// generated by "go run gen.go". DO NOT EDIT.

package draw

import (
	"image"
	"image/color"
	"math"

	"golang.org/x/image/math/f64"
)

func (z nnInterpolator) Scale(dst Image, dr image.Rectangle, src image.Image, sr image.Rectangle, op Op, opts *Options) {
	// Try to simplify a Scale to a Copy when DstMask is not specified.
	// If DstMask is not nil, Copy will call Scale back with same dr and sr, and cause stack overflow.
	if dr.Size() == sr.Size() && (opts == nil || opts.DstMask == nil) {
		Copy(dst, dr.Min, src, sr, op, opts)
		return
	}

	var o Options
	if opts != nil {
		o = *opts
	}

	// adr is the affected destination pixels.
	adr := dst.Bounds().Intersect(dr)
	adr, o.DstMask = clipAffectedDestRect(adr, o.DstMask, o.DstMaskP)
	if adr.Empty() || sr.Empty() {
		return
	}
	// Make adr relative to dr.Min.
	adr = adr.Sub(dr.Min)
	if op == Over && o.SrcMask == nil && opaque(src) {
		op = Src
	}

	// sr is the source pixels. If it extends beyond the src bounds,
	// we cannot use the type-specific fast paths, as they access
	// the Pix fields directly without bounds checking.
	//
	// Similarly, the fast paths assume that the masks are nil.
	if o.DstMask != nil || o.SrcMask != nil || !sr.In(src.Bounds()) {
		switch op {
		case Over:
			z.scale_Image_Image_Over(dst, dr, adr, src, sr, &o)
		case Src:
			z.scale_Image_Image_Src(dst, dr, adr, src, sr, &o)
		}
	} else if _, ok := src.(*image.Uniform); ok {
		Draw(dst, dr, src, src.Bounds().Min, op)
	} else {
		switch op {
		case Over:
			switch dst := dst.(type) {
			case *image.RGBA:
				switch src := src.(type) {
				case *image.NRGBA:
					z.scale_RGBA_NRGBA_Over(dst, dr, adr, src, sr, &o)
				case *image.RGBA:
					z.scale_RGBA_RGBA_Over(dst, dr, adr, src, sr, &o)
				default:
					z.scale_RGBA_Image_Over(dst, dr, adr, src, sr, &o)
				}
			default:
				switch src := src.(type) {
				default:
					z.scale_Image_Image_Over(dst, dr, adr, src, sr, &o)
				}
			}
		case Src:
			switch dst := dst.(type) {
			case *image.RGBA:
				switch src := src.(type) {
				case *image.Gray:
					z.scale_RGBA_Gray_Src(dst, dr, adr, src, sr, &o)
				case *image.NRGBA:
					z.scale_RGBA_NRGBA_Src(dst, dr, adr, src, sr, &o)
				case *image.RGBA:
					z.scale_RGBA_RGBA_Src(dst, dr, adr, src, sr, &o)
				case *image.YCbCr:
					switch src.SubsampleRatio {
					default:
						z.scale_RGBA_Image_Src(dst, dr, adr, src, sr, &o)
					case image.YCbCrSubsampleRatio444:
						z.scale_RGBA_YCbCr444_Src(dst, dr, adr, src, sr, &o)
					case image.YCbCrSubsampleRatio422:
						z.scale_RGBA_YCbCr422_Src(dst, dr, adr, src, sr, &o)
					case image.YCbCrSubsampleRatio420:
						z.scale_RGBA_YCbCr420_Src(dst, dr, adr, src, sr, &o)
					case image.YCbCrSubsampleRatio440:
						z.scale_RGBA_YCbCr440_Src(dst, dr, adr, src, sr, &o)
					}
				default:
					z.scale_RGBA_Image_Src(dst, dr, adr, src, sr, &o)
				}
			default:
				switch src := src.(type) {
				default:
					z.scale_Image_Image_Src(dst, dr, adr, src, sr, &o)
				}
			}
		}
	}
}

func (z nnInterpolator) Transform(dst Image, s2d f64.Aff3, src image.Image, sr image.Rectangle, op Op, opts *Options) {
	// Try to simplify a Transform to a Copy.
	if s2d[0] == 1 && s2d[1] == 0 && s2d[3] == 0 && s2d[4] == 1 {
		dx := int(s2d[2])
		dy := int(s2d[5])
		if float64(dx) == s2d[2] && float64(dy) == s2d[5] {
			Copy(dst, image.Point{X: sr.Min.X + dx, Y: sr.Min.X + dy}, src, sr, op, opts)
			return
		}
	}

	var o Options
	if opts != nil {
		o = *opts
	}

	dr := transformRect(&s2d, &sr)
	// adr is the affected destination pixels.
	adr := dst.Bounds().Intersect(dr)
	adr, o.DstMask = clipAffectedDestRect(adr, o.DstMask, o.DstMaskP)
	if adr.Empty() || sr.Empty() {
		return
	}
	if op == Over && o.SrcMask == nil && opaque(src) {
		op = Src
	}

	d2s := invert(&s2d)
	// bias is a translation of the mapping from dst coordinates to src
	// coordinates such that the latter temporarily have non-negative X
	// and Y coordinates. This allows us to write int(f) instead of
	// int(math.Floor(f)), since "round to zero" and "round down" are
	// equivalent when f >= 0, but the former is much cheaper. The X--
	// and Y-- are because the TransformLeaf methods have a "sx -= 0.5"
	// adjustment.
	bias := transformRect(&d2s, &adr).Min
	bias.X--
	bias.Y--
	d2s[2] -= float64(bias.X)
	d2s[5] -= float64(bias.Y)
	// Make adr relative to dr.Min.
	adr = adr.Sub(dr.Min)
	// sr is the source pixels. If it extends beyond the src bounds,
	// we cannot use the type-specific fast paths, as they access
	// the Pix fields directly without bounds checking.
	//
	// Similarly, the fast paths assume that the masks are nil.
	if o.DstMask != nil || o.SrcMask != nil || !sr.In(src.Bounds()) {
		switch op {
		case Over:
			z.transform_Image_Image_Over(dst, dr, adr, &d2s, src, sr, bias, &o)
		case Src:
			z.transform_Image_Image_Src(dst, dr, adr, &d2s, src, sr, bias, &o)
		}
	} else if u, ok := src.(*image.Uniform); ok {
		transform_Uniform(dst, dr, adr, &d2s, u, sr, bias, op)
	} else {
		switch op {
		case Over:
			switch dst := dst.(type) {
			case *image.RGBA:
				switch src := src.(type) {
				case *image.NRGBA:
					z.transform_RGBA_NRGBA_Over(dst, dr, adr, &d2s, src, sr, bias, &o)
				case *image.RGBA:
					z.transform_RGBA_RGBA_Over(dst, dr, adr, &d2s, src, sr, bias, &o)
				default:
					z.transform_RGBA_Image_Over(dst, dr, adr, &d2s, src, sr, bias, &o)
				}
			default:
				switch src := src.(type) {
				default:
					z.transform_Image_Image_Over(dst, dr, adr, &d2s, src, sr, bias, &o)
				}
			}
		case Src:
			switch dst := dst.(type) {
			case *image.RGBA:
				switch src := src.(type) {
				case *image.Gray:
					z.transform_RGBA_Gray_Src(dst, dr, adr, &d2s, src, sr, bias, &o)
				case *image.NRGBA:
					z.transform_RGBA_NRGBA_Src(dst, dr, adr, &d2s, src, sr, bias, &o)
				case *image.RGBA:
					z.transform_RGBA_RGBA_Src(dst, dr, adr, &d2s, src, sr, bias, &o)
				case *image.YCbCr:
					switch src.SubsampleRatio {
					default:
						z.transform_RGBA_Image_Src(dst, dr, adr, &d2s, src, sr, bias, &o)
					case image.YCbCrSubsampleRatio444:
						z.transform_RGBA_YCbCr444_Src(dst, dr, adr, &d2s, src, sr, bias, &o)
					case image.YCbCrSubsampleRatio422:
						z.transform_RGBA_YCbCr422_Src(dst, dr, adr, &d2s, src, sr, bias, &o)
					case image.YCbCrSubsampleRatio420:
						z.transform_RGBA_YCbCr420_Src(dst, dr, adr, &d2s, src, sr, bias, &o)
					case image.YCbCrSubsampleRatio440:
						z.transform_RGBA_YCbCr440_Src(dst, dr, adr, &d2s, src, sr, bias, &o)
					}
				default:
					z.transform_RGBA_Image_Src(dst, dr, adr, &d2s, src, sr, bias, &o)
				}
			default:
				switch src := src.(type) {
				default:
					z.transform_Image_Image_Src(dst, dr, adr, &d2s, src, sr, bias, &o)
				}
			}
		}
	}
}

func (nnInterpolator) scale_RGBA_Gray_Src(dst *image.RGBA, dr, adr image.Rectangle, src *image.Gray, sr image.Rectangle, opts *Options) {
	dw2 := uint64(dr.Dx()) * 2
	dh2 := uint64(dr.Dy()) * 2
	sw := uint64(sr.Dx())
	sh := uint64(sr.Dy())
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		sy := (2*uint64(dy) + 1) * sh / dh2
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			sx := (2*uint64(dx) + 1) * sw / dw2
			pi := (sr.Min.Y+int(sy)-src.Rect.Min.Y)*src.Stride + (sr.Min.X + int(sx) - src.Rect.Min.X)
			pr := uint32(src.Pix[pi]) * 0x101
			out := uint8(pr >> 8)
			dst.Pix[d+0] = out
			dst.Pix[d+1] = out
			dst.Pix[d+2] = out
			dst.Pix[d+3] = 0xff
		}
	}
}

func (nnInterpolator) scale_RGBA_NRGBA_Over(dst *image.RGBA, dr, adr image.Rectangle, src *image.NRGBA, sr image.Rectangle, opts *Options) {
	dw2 := uint64(dr.Dx()) * 2
	dh2 := uint64(dr.Dy()) * 2
	sw := uint64(sr.Dx())
	sh := uint64(sr.Dy())
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		sy := (2*uint64(dy) + 1) * sh / dh2
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			sx := (2*uint64(dx) + 1) * sw / dw2
			pi := (sr.Min.Y+int(sy)-src.Rect.Min.Y)*src.Stride + (sr.Min.X+int(sx)-src.Rect.Min.X)*4
			pa := uint32(src.Pix[pi+3]) * 0x101
			pr := uint32(src.Pix[pi+0]) * pa / 0xff
			pg := uint32(src.Pix[pi+1]) * pa / 0xff
			pb := uint32(src.Pix[pi+2]) * pa / 0xff
			pa1 := (0xffff - pa) * 0x101
			dst.Pix[d+0] = uint8((uint32(dst.Pix[d+0])*pa1/0xffff + pr) >> 8)
			dst.Pix[d+1] = uint8((uint32(dst.Pix[d+1])*pa1/0xffff + pg) >> 8)
			dst.Pix[d+2] = uint8((uint32(dst.Pix[d+2])*pa1/0xffff + pb) >> 8)
			dst.Pix[d+3] = uint8((uint32(dst.Pix[d+3])*pa1/0xffff + pa) >> 8)
		}
	}
}

func (nnInterpolator) scale_RGBA_NRGBA_Src(dst *image.RGBA, dr, adr image.Rectangle, src *image.NRGBA, sr image.Rectangle, opts *Options) {
	dw2 := uint64(dr.Dx()) * 2
	dh2 := uint64(dr.Dy()) * 2
	sw := uint64(sr.Dx())
	sh := uint64(sr.Dy())
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		sy := (2*uint64(dy) + 1) * sh / dh2
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			sx := (2*uint64(dx) + 1) * sw / dw2
			pi := (sr.Min.Y+int(sy)-src.Rect.Min.Y)*src.Stride + (sr.Min.X+int(sx)-src.Rect.Min.X)*4
			pa := uint32(src.Pix[pi+3]) * 0x101
			pr := uint32(src.Pix[pi+0]) * pa / 0xff
			pg := uint32(src.Pix[pi+1]) * pa / 0xff
			pb := uint32(src.Pix[pi+2]) * pa / 0xff
			dst.Pix[d+0] = uint8(pr >> 8)
			dst.Pix[d+1] = uint8(pg >> 8)
			dst.Pix[d+2] = uint8(pb >> 8)
			dst.Pix[d+3] = uint8(pa >> 8)
		}
	}
}

func (nnInterpolator) scale_RGBA_RGBA_Over(dst *image.RGBA, dr, adr image.Rectangle, src *image.RGBA, sr image.Rectangle, opts *Options) {
	dw2 := uint64(dr.Dx()) * 2
	dh2 := uint64(dr.Dy()) * 2
	sw := uint64(sr.Dx())
	sh := uint64(sr.Dy())
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		sy := (2*uint64(dy) + 1) * sh / dh2
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			sx := (2*uint64(dx) + 1) * sw / dw2
			pi := (sr.Min.Y+int(sy)-src.Rect.Min.Y)*src.Stride + (sr.Min.X+int(sx)-src.Rect.Min.X)*4
			pr := uint32(src.Pix[pi+0]) * 0x101
			pg := uint32(src.Pix[pi+1]) * 0x101
			pb := uint32(src.Pix[pi+2]) * 0x101
			pa := uint32(src.Pix[pi+3]) * 0x101
			pa1 := (0xffff - pa) * 0x101
			dst.Pix[d+0] = uint8((uint32(dst.Pix[d+0])*pa1/0xffff + pr) >> 8)
			dst.Pix[d+1] = uint8((uint32(dst.Pix[d+1])*pa1/0xffff + pg) >> 8)
			dst.Pix[d+2] = uint8((uint32(dst.Pix[d+2])*pa1/0xffff + pb) >> 8)
			dst.Pix[d+3] = uint8((uint32(dst.Pix[d+3])*pa1/0xffff + pa) >> 8)
		}
	}
}

func (nnInterpolator) scale_RGBA_RGBA_Src(dst *image.RGBA, dr, adr image.Rectangle, src *image.RGBA, sr image.Rectangle, opts *Options) {
	dw2 := uint64(dr.Dx()) * 2
	dh2 := uint64(dr.Dy()) * 2
	sw := uint64(sr.Dx())
	sh := uint64(sr.Dy())
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		sy := (2*uint64(dy) + 1) * sh / dh2
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			sx := (2*uint64(dx) + 1) * sw / dw2
			pi := (sr.Min.Y+int(sy)-src.Rect.Min.Y)*src.Stride + (sr.Min.X+int(sx)-src.Rect.Min.X)*4
			pr := uint32(src.Pix[pi+0]) * 0x101
			pg := uint32(src.Pix[pi+1]) * 0x101
			pb := uint32(src.Pix[pi+2]) * 0x101
			pa := uint32(src.Pix[pi+3]) * 0x101
			dst.Pix[d+0] = uint8(pr >> 8)
			dst.Pix[d+1] = uint8(pg >> 8)
			dst.Pix[d+2] = uint8(pb >> 8)
			dst.Pix[d+3] = uint8(pa >> 8)
		}
	}
}

func (nnInterpolator) scale_RGBA_YCbCr444_Src(dst *image.RGBA, dr, adr image.Rectangle, src *image.YCbCr, sr image.Rectangle, opts *Options) {
	dw2 := uint64(dr.Dx()) * 2
	dh2 := uint64(dr.Dy()) * 2
	sw := uint64(sr.Dx())
	sh := uint64(sr.Dy())
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		sy := (2*uint64(dy) + 1) * sh / dh2
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			sx := (2*uint64(dx) + 1) * sw / dw2
			pi := (sr.Min.Y+int(sy)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx) - src.Rect.Min.X)
			pj := (sr.Min.Y+int(sy)-src.Rect.Min.Y)*src.CStride + (sr.Min.X + int(sx) - src.Rect.Min.X)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			pyy1 := int(src.Y[pi]) * 0x10101
			pcb1 := int(src.Cb[pj]) - 128
			pcr1 := int(src.Cr[pj]) - 128
			pr := (pyy1 + 91881*pcr1) >> 8
			pg := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 8
			pb := (pyy1 + 116130*pcb1) >> 8
			if pr < 0 {
				pr = 0
			} else if pr > 0xffff {
				pr = 0xffff
			}
			if pg < 0 {
				pg = 0
			} else if pg > 0xffff {
				pg = 0xffff
			}
			if pb < 0 {
				pb = 0
			} else if pb > 0xffff {
				pb = 0xffff
			}
			dst.Pix[d+0] = uint8(pr >> 8)
			dst.Pix[d+1] = uint8(pg >> 8)
			dst.Pix[d+2] = uint8(pb >> 8)
			dst.Pix[d+3] = 0xff
		}
	}
}

func (nnInterpolator) scale_RGBA_YCbCr422_Src(dst *image.RGBA, dr, adr image.Rectangle, src *image.YCbCr, sr image.Rectangle, opts *Options) {
	dw2 := uint64(dr.Dx()) * 2
	dh2 := uint64(dr.Dy()) * 2
	sw := uint64(sr.Dx())
	sh := uint64(sr.Dy())
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		sy := (2*uint64(dy) + 1) * sh / dh2
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			sx := (2*uint64(dx) + 1) * sw / dw2
			pi := (sr.Min.Y+int(sy)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx) - src.Rect.Min.X)
			pj := (sr.Min.Y+int(sy)-src.Rect.Min.Y)*src.CStride + ((sr.Min.X+int(sx))/2 - src.Rect.Min.X/2)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			pyy1 := int(src.Y[pi]) * 0x10101
			pcb1 := int(src.Cb[pj]) - 128
			pcr1 := int(src.Cr[pj]) - 128
			pr := (pyy1 + 91881*pcr1) >> 8
			pg := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 8
			pb := (pyy1 + 116130*pcb1) >> 8
			if pr < 0 {
				pr = 0
			} else if pr > 0xffff {
				pr = 0xffff
			}
			if pg < 0 {
				pg = 0
			} else if pg > 0xffff {
				pg = 0xffff
			}
			if pb < 0 {
				pb = 0
			} else if pb > 0xffff {
				pb = 0xffff
			}
			dst.Pix[d+0] = uint8(pr >> 8)
			dst.Pix[d+1] = uint8(pg >> 8)
			dst.Pix[d+2] = uint8(pb >> 8)
			dst.Pix[d+3] = 0xff
		}
	}
}

func (nnInterpolator) scale_RGBA_YCbCr420_Src(dst *image.RGBA, dr, adr image.Rectangle, src *image.YCbCr, sr image.Rectangle, opts *Options) {
	dw2 := uint64(dr.Dx()) * 2
	dh2 := uint64(dr.Dy()) * 2
	sw := uint64(sr.Dx())
	sh := uint64(sr.Dy())
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		sy := (2*uint64(dy) + 1) * sh / dh2
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			sx := (2*uint64(dx) + 1) * sw / dw2
			pi := (sr.Min.Y+int(sy)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx) - src.Rect.Min.X)
			pj := ((sr.Min.Y+int(sy))/2-src.Rect.Min.Y/2)*src.CStride + ((sr.Min.X+int(sx))/2 - src.Rect.Min.X/2)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			pyy1 := int(src.Y[pi]) * 0x10101
			pcb1 := int(src.Cb[pj]) - 128
			pcr1 := int(src.Cr[pj]) - 128
			pr := (pyy1 + 91881*pcr1) >> 8
			pg := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 8
			pb := (pyy1 + 116130*pcb1) >> 8
			if pr < 0 {
				pr = 0
			} else if pr > 0xffff {
				pr = 0xffff
			}
			if pg < 0 {
				pg = 0
			} else if pg > 0xffff {
				pg = 0xffff
			}
			if pb < 0 {
				pb = 0
			} else if pb > 0xffff {
				pb = 0xffff
			}
			dst.Pix[d+0] = uint8(pr >> 8)
			dst.Pix[d+1] = uint8(pg >> 8)
			dst.Pix[d+2] = uint8(pb >> 8)
			dst.Pix[d+3] = 0xff
		}
	}
}

func (nnInterpolator) scale_RGBA_YCbCr440_Src(dst *image.RGBA, dr, adr image.Rectangle, src *image.YCbCr, sr image.Rectangle, opts *Options) {
	dw2 := uint64(dr.Dx()) * 2
	dh2 := uint64(dr.Dy()) * 2
	sw := uint64(sr.Dx())
	sh := uint64(sr.Dy())
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		sy := (2*uint64(dy) + 1) * sh / dh2
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			sx := (2*uint64(dx) + 1) * sw / dw2
			pi := (sr.Min.Y+int(sy)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx) - src.Rect.Min.X)
			pj := ((sr.Min.Y+int(sy))/2-src.Rect.Min.Y/2)*src.CStride + (sr.Min.X + int(sx) - src.Rect.Min.X)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			pyy1 := int(src.Y[pi]) * 0x10101
			pcb1 := int(src.Cb[pj]) - 128
			pcr1 := int(src.Cr[pj]) - 128
			pr := (pyy1 + 91881*pcr1) >> 8
			pg := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 8
			pb := (pyy1 + 116130*pcb1) >> 8
			if pr < 0 {
				pr = 0
			} else if pr > 0xffff {
				pr = 0xffff
			}
			if pg < 0 {
				pg = 0
			} else if pg > 0xffff {
				pg = 0xffff
			}
			if pb < 0 {
				pb = 0
			} else if pb > 0xffff {
				pb = 0xffff
			}
			dst.Pix[d+0] = uint8(pr >> 8)
			dst.Pix[d+1] = uint8(pg >> 8)
			dst.Pix[d+2] = uint8(pb >> 8)
			dst.Pix[d+3] = 0xff
		}
	}
}

func (nnInterpolator) scale_RGBA_Image_Over(dst *image.RGBA, dr, adr image.Rectangle, src image.Image, sr image.Rectangle, opts *Options) {
	dw2 := uint64(dr.Dx()) * 2
	dh2 := uint64(dr.Dy()) * 2
	sw := uint64(sr.Dx())
	sh := uint64(sr.Dy())
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		sy := (2*uint64(dy) + 1) * sh / dh2
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			sx := (2*uint64(dx) + 1) * sw / dw2
			pr, pg, pb, pa := src.At(sr.Min.X+int(sx), sr.Min.Y+int(sy)).RGBA()
			pa1 := (0xffff - pa) * 0x101
			dst.Pix[d+0] = uint8((uint32(dst.Pix[d+0])*pa1/0xffff + pr) >> 8)
			dst.Pix[d+1] = uint8((uint32(dst.Pix[d+1])*pa1/0xffff + pg) >> 8)
			dst.Pix[d+2] = uint8((uint32(dst.Pix[d+2])*pa1/0xffff + pb) >> 8)
			dst.Pix[d+3] = uint8((uint32(dst.Pix[d+3])*pa1/0xffff + pa) >> 8)
		}
	}
}

func (nnInterpolator) scale_RGBA_Image_Src(dst *image.RGBA, dr, adr image.Rectangle, src image.Image, sr image.Rectangle, opts *Options) {
	dw2 := uint64(dr.Dx()) * 2
	dh2 := uint64(dr.Dy()) * 2
	sw := uint64(sr.Dx())
	sh := uint64(sr.Dy())
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		sy := (2*uint64(dy) + 1) * sh / dh2
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			sx := (2*uint64(dx) + 1) * sw / dw2
			pr, pg, pb, pa := src.At(sr.Min.X+int(sx), sr.Min.Y+int(sy)).RGBA()
			dst.Pix[d+0] = uint8(pr >> 8)
			dst.Pix[d+1] = uint8(pg >> 8)
			dst.Pix[d+2] = uint8(pb >> 8)
			dst.Pix[d+3] = uint8(pa >> 8)
		}
	}
}

func (nnInterpolator) scale_Image_Image_Over(dst Image, dr, adr image.Rectangle, src image.Image, sr image.Rectangle, opts *Options) {
	dw2 := uint64(dr.Dx()) * 2
	dh2 := uint64(dr.Dy()) * 2
	sw := uint64(sr.Dx())
	sh := uint64(sr.Dy())
	srcMask, smp := opts.SrcMask, opts.SrcMaskP
	dstMask, dmp := opts.DstMask, opts.DstMaskP
	dstColorRGBA64 := &color.RGBA64{}
	dstColor := color.Color(dstColorRGBA64)
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		sy := (2*uint64(dy) + 1) * sh / dh2
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
			sx := (2*uint64(dx) + 1) * sw / dw2
			pr, pg, pb, pa := src.At(sr.Min.X+int(sx), sr.Min.Y+int(sy)).RGBA()
			if srcMask != nil {
				_, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx), smp.Y+sr.Min.Y+int(sy)).RGBA()
				pr = pr * ma / 0xffff
				pg = pg * ma / 0xffff
				pb = pb * ma / 0xffff
				pa = pa * ma / 0xffff
			}
			qr, qg, qb, qa := dst.At(dr.Min.X+int(dx), dr.Min.Y+int(dy)).RGBA()
			if dstMask != nil {
				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA()
				pr = pr * ma / 0xffff
				pg = pg * ma / 0xffff
				pb = pb * ma / 0xffff
				pa = pa * ma / 0xffff
			}
			pa1 := 0xffff - pa
			dstColorRGBA64.R = uint16(qr*pa1/0xffff + pr)
			dstColorRGBA64.G = uint16(qg*pa1/0xffff + pg)
			dstColorRGBA64.B = uint16(qb*pa1/0xffff + pb)
			dstColorRGBA64.A = uint16(qa*pa1/0xffff + pa)
			dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColor)
		}
	}
}

func (nnInterpolator) scale_Image_Image_Src(dst Image, dr, adr image.Rectangle, src image.Image, sr image.Rectangle, opts *Options) {
	dw2 := uint64(dr.Dx()) * 2
	dh2 := uint64(dr.Dy()) * 2
	sw := uint64(sr.Dx())
	sh := uint64(sr.Dy())
	srcMask, smp := opts.SrcMask, opts.SrcMaskP
	dstMask, dmp := opts.DstMask, opts.DstMaskP
	dstColorRGBA64 := &color.RGBA64{}
	dstColor := color.Color(dstColorRGBA64)
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		sy := (2*uint64(dy) + 1) * sh / dh2
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
			sx := (2*uint64(dx) + 1) * sw / dw2
			pr, pg, pb, pa := src.At(sr.Min.X+int(sx), sr.Min.Y+int(sy)).RGBA()
			if srcMask != nil {
				_, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx), smp.Y+sr.Min.Y+int(sy)).RGBA()
				pr = pr * ma / 0xffff
				pg = pg * ma / 0xffff
				pb = pb * ma / 0xffff
				pa = pa * ma / 0xffff
			}
			if dstMask != nil {
				qr, qg, qb, qa := dst.At(dr.Min.X+int(dx), dr.Min.Y+int(dy)).RGBA()
				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA()
				pr = pr * ma / 0xffff
				pg = pg * ma / 0xffff
				pb = pb * ma / 0xffff
				pa = pa * ma / 0xffff
				pa1 := 0xffff - ma
				dstColorRGBA64.R = uint16(qr*pa1/0xffff + pr)
				dstColorRGBA64.G = uint16(qg*pa1/0xffff + pg)
				dstColorRGBA64.B = uint16(qb*pa1/0xffff + pb)
				dstColorRGBA64.A = uint16(qa*pa1/0xffff + pa)
				dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColor)
			} else {
				dstColorRGBA64.R = uint16(pr)
				dstColorRGBA64.G = uint16(pg)
				dstColorRGBA64.B = uint16(pb)
				dstColorRGBA64.A = uint16(pa)
				dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColor)
			}
		}
	}
}

func (nnInterpolator) transform_RGBA_Gray_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.Gray, sr image.Rectangle, bias image.Point, opts *Options) {
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
			if !(image.Point{sx0, sy0}).In(sr) {
				continue
			}
			pi := (sy0-src.Rect.Min.Y)*src.Stride + (sx0 - src.Rect.Min.X)
			pr := uint32(src.Pix[pi]) * 0x101
			out := uint8(pr >> 8)
			dst.Pix[d+0] = out
			dst.Pix[d+1] = out
			dst.Pix[d+2] = out
			dst.Pix[d+3] = 0xff
		}
	}
}

func (nnInterpolator) transform_RGBA_NRGBA_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.NRGBA, sr image.Rectangle, bias image.Point, opts *Options) {
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
			if !(image.Point{sx0, sy0}).In(sr) {
				continue
			}
			pi := (sy0-src.Rect.Min.Y)*src.Stride + (sx0-src.Rect.Min.X)*4
			pa := uint32(src.Pix[pi+3]) * 0x101
			pr := uint32(src.Pix[pi+0]) * pa / 0xff
			pg := uint32(src.Pix[pi+1]) * pa / 0xff
			pb := uint32(src.Pix[pi+2]) * pa / 0xff
			pa1 := (0xffff - pa) * 0x101
			dst.Pix[d+0] = uint8((uint32(dst.Pix[d+0])*pa1/0xffff + pr) >> 8)
			dst.Pix[d+1] = uint8((uint32(dst.Pix[d+1])*pa1/0xffff + pg) >> 8)
			dst.Pix[d+2] = uint8((uint32(dst.Pix[d+2])*pa1/0xffff + pb) >> 8)
			dst.Pix[d+3] = uint8((uint32(dst.Pix[d+3])*pa1/0xffff + pa) >> 8)
		}
	}
}

func (nnInterpolator) transform_RGBA_NRGBA_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.NRGBA, sr image.Rectangle, bias image.Point, opts *Options) {
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
			if !(image.Point{sx0, sy0}).In(sr) {
				continue
			}
			pi := (sy0-src.Rect.Min.Y)*src.Stride + (sx0-src.Rect.Min.X)*4
			pa := uint32(src.Pix[pi+3]) * 0x101
			pr := uint32(src.Pix[pi+0]) * pa / 0xff
			pg := uint32(src.Pix[pi+1]) * pa / 0xff
			pb := uint32(src.Pix[pi+2]) * pa / 0xff
			dst.Pix[d+0] = uint8(pr >> 8)
			dst.Pix[d+1] = uint8(pg >> 8)
			dst.Pix[d+2] = uint8(pb >> 8)
			dst.Pix[d+3] = uint8(pa >> 8)
		}
	}
}

func (nnInterpolator) transform_RGBA_RGBA_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.RGBA, sr image.Rectangle, bias image.Point, opts *Options) {
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
			if !(image.Point{sx0, sy0}).In(sr) {
				continue
			}
			pi := (sy0-src.Rect.Min.Y)*src.Stride + (sx0-src.Rect.Min.X)*4
			pr := uint32(src.Pix[pi+0]) * 0x101
			pg := uint32(src.Pix[pi+1]) * 0x101
			pb := uint32(src.Pix[pi+2]) * 0x101
			pa := uint32(src.Pix[pi+3]) * 0x101
			pa1 := (0xffff - pa) * 0x101
			dst.Pix[d+0] = uint8((uint32(dst.Pix[d+0])*pa1/0xffff + pr) >> 8)
			dst.Pix[d+1] = uint8((uint32(dst.Pix[d+1])*pa1/0xffff + pg) >> 8)
			dst.Pix[d+2] = uint8((uint32(dst.Pix[d+2])*pa1/0xffff + pb) >> 8)
			dst.Pix[d+3] = uint8((uint32(dst.Pix[d+3])*pa1/0xffff + pa) >> 8)
		}
	}
}

func (nnInterpolator) transform_RGBA_RGBA_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.RGBA, sr image.Rectangle, bias image.Point, opts *Options) {
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
			if !(image.Point{sx0, sy0}).In(sr) {
				continue
			}
			pi := (sy0-src.Rect.Min.Y)*src.Stride + (sx0-src.Rect.Min.X)*4
			pr := uint32(src.Pix[pi+0]) * 0x101
			pg := uint32(src.Pix[pi+1]) * 0x101
			pb := uint32(src.Pix[pi+2]) * 0x101
			pa := uint32(src.Pix[pi+3]) * 0x101
			dst.Pix[d+0] = uint8(pr >> 8)
			dst.Pix[d+1] = uint8(pg >> 8)
			dst.Pix[d+2] = uint8(pb >> 8)
			dst.Pix[d+3] = uint8(pa >> 8)
		}
	}
}

func (nnInterpolator) transform_RGBA_YCbCr444_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point, opts *Options) {
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
			if !(image.Point{sx0, sy0}).In(sr) {
				continue
			}
			pi := (sy0-src.Rect.Min.Y)*src.YStride + (sx0 - src.Rect.Min.X)
			pj := (sy0-src.Rect.Min.Y)*src.CStride + (sx0 - src.Rect.Min.X)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			pyy1 := int(src.Y[pi]) * 0x10101
			pcb1 := int(src.Cb[pj]) - 128
			pcr1 := int(src.Cr[pj]) - 128
			pr := (pyy1 + 91881*pcr1) >> 8
			pg := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 8
			pb := (pyy1 + 116130*pcb1) >> 8
			if pr < 0 {
				pr = 0
			} else if pr > 0xffff {
				pr = 0xffff
			}
			if pg < 0 {
				pg = 0
			} else if pg > 0xffff {
				pg = 0xffff
			}
			if pb < 0 {
				pb = 0
			} else if pb > 0xffff {
				pb = 0xffff
			}
			dst.Pix[d+0] = uint8(pr >> 8)
			dst.Pix[d+1] = uint8(pg >> 8)
			dst.Pix[d+2] = uint8(pb >> 8)
			dst.Pix[d+3] = 0xff
		}
	}
}

func (nnInterpolator) transform_RGBA_YCbCr422_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point, opts *Options) {
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
			if !(image.Point{sx0, sy0}).In(sr) {
				continue
			}
			pi := (sy0-src.Rect.Min.Y)*src.YStride + (sx0 - src.Rect.Min.X)
			pj := (sy0-src.Rect.Min.Y)*src.CStride + ((sx0)/2 - src.Rect.Min.X/2)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			pyy1 := int(src.Y[pi]) * 0x10101
			pcb1 := int(src.Cb[pj]) - 128
			pcr1 := int(src.Cr[pj]) - 128
			pr := (pyy1 + 91881*pcr1) >> 8
			pg := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 8
			pb := (pyy1 + 116130*pcb1) >> 8
			if pr < 0 {
				pr = 0
			} else if pr > 0xffff {
				pr = 0xffff
			}
			if pg < 0 {
				pg = 0
			} else if pg > 0xffff {
				pg = 0xffff
			}
			if pb < 0 {
				pb = 0
			} else if pb > 0xffff {
				pb = 0xffff
			}
			dst.Pix[d+0] = uint8(pr >> 8)
			dst.Pix[d+1] = uint8(pg >> 8)
			dst.Pix[d+2] = uint8(pb >> 8)
			dst.Pix[d+3] = 0xff
		}
	}
}

func (nnInterpolator) transform_RGBA_YCbCr420_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point, opts *Options) {
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
			if !(image.Point{sx0, sy0}).In(sr) {
				continue
			}
			pi := (sy0-src.Rect.Min.Y)*src.YStride + (sx0 - src.Rect.Min.X)
			pj := ((sy0)/2-src.Rect.Min.Y/2)*src.CStride + ((sx0)/2 - src.Rect.Min.X/2)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			pyy1 := int(src.Y[pi]) * 0x10101
			pcb1 := int(src.Cb[pj]) - 128
			pcr1 := int(src.Cr[pj]) - 128
			pr := (pyy1 + 91881*pcr1) >> 8
			pg := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 8
			pb := (pyy1 + 116130*pcb1) >> 8
			if pr < 0 {
				pr = 0
			} else if pr > 0xffff {
				pr = 0xffff
			}
			if pg < 0 {
				pg = 0
			} else if pg > 0xffff {
				pg = 0xffff
			}
			if pb < 0 {
				pb = 0
			} else if pb > 0xffff {
				pb = 0xffff
			}
			dst.Pix[d+0] = uint8(pr >> 8)
			dst.Pix[d+1] = uint8(pg >> 8)
			dst.Pix[d+2] = uint8(pb >> 8)
			dst.Pix[d+3] = 0xff
		}
	}
}

func (nnInterpolator) transform_RGBA_YCbCr440_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point, opts *Options) {
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
			if !(image.Point{sx0, sy0}).In(sr) {
				continue
			}
			pi := (sy0-src.Rect.Min.Y)*src.YStride + (sx0 - src.Rect.Min.X)
			pj := ((sy0)/2-src.Rect.Min.Y/2)*src.CStride + (sx0 - src.Rect.Min.X)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			pyy1 := int(src.Y[pi]) * 0x10101
			pcb1 := int(src.Cb[pj]) - 128
			pcr1 := int(src.Cr[pj]) - 128
			pr := (pyy1 + 91881*pcr1) >> 8
			pg := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 8
			pb := (pyy1 + 116130*pcb1) >> 8
			if pr < 0 {
				pr = 0
			} else if pr > 0xffff {
				pr = 0xffff
			}
			if pg < 0 {
				pg = 0
			} else if pg > 0xffff {
				pg = 0xffff
			}
			if pb < 0 {
				pb = 0
			} else if pb > 0xffff {
				pb = 0xffff
			}
			dst.Pix[d+0] = uint8(pr >> 8)
			dst.Pix[d+1] = uint8(pg >> 8)
			dst.Pix[d+2] = uint8(pb >> 8)
			dst.Pix[d+3] = 0xff
		}
	}
}

func (nnInterpolator) transform_RGBA_Image_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point, opts *Options) {
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
			if !(image.Point{sx0, sy0}).In(sr) {
				continue
			}
			pr, pg, pb, pa := src.At(sx0, sy0).RGBA()
			pa1 := (0xffff - pa) * 0x101
			dst.Pix[d+0] = uint8((uint32(dst.Pix[d+0])*pa1/0xffff + pr) >> 8)
			dst.Pix[d+1] = uint8((uint32(dst.Pix[d+1])*pa1/0xffff + pg) >> 8)
			dst.Pix[d+2] = uint8((uint32(dst.Pix[d+2])*pa1/0xffff + pb) >> 8)
			dst.Pix[d+3] = uint8((uint32(dst.Pix[d+3])*pa1/0xffff + pa) >> 8)
		}
	}
}

func (nnInterpolator) transform_RGBA_Image_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point, opts *Options) {
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
			if !(image.Point{sx0, sy0}).In(sr) {
				continue
			}
			pr, pg, pb, pa := src.At(sx0, sy0).RGBA()
			dst.Pix[d+0] = uint8(pr >> 8)
			dst.Pix[d+1] = uint8(pg >> 8)
			dst.Pix[d+2] = uint8(pb >> 8)
			dst.Pix[d+3] = uint8(pa >> 8)
		}
	}
}

func (nnInterpolator) transform_Image_Image_Over(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point, opts *Options) {
	srcMask, smp := opts.SrcMask, opts.SrcMaskP
	dstMask, dmp := opts.DstMask, opts.DstMaskP
	dstColorRGBA64 := &color.RGBA64{}
	dstColor := color.Color(dstColorRGBA64)
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
			if !(image.Point{sx0, sy0}).In(sr) {
				continue
			}
			pr, pg, pb, pa := src.At(sx0, sy0).RGBA()
			if srcMask != nil {
				_, _, _, ma := srcMask.At(smp.X+sx0, smp.Y+sy0).RGBA()
				pr = pr * ma / 0xffff
				pg = pg * ma / 0xffff
				pb = pb * ma / 0xffff
				pa = pa * ma / 0xffff
			}
			qr, qg, qb, qa := dst.At(dr.Min.X+int(dx), dr.Min.Y+int(dy)).RGBA()
			if dstMask != nil {
				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA()
				pr = pr * ma / 0xffff
				pg = pg * ma / 0xffff
				pb = pb * ma / 0xffff
				pa = pa * ma / 0xffff
			}
			pa1 := 0xffff - pa
			dstColorRGBA64.R = uint16(qr*pa1/0xffff + pr)
			dstColorRGBA64.G = uint16(qg*pa1/0xffff + pg)
			dstColorRGBA64.B = uint16(qb*pa1/0xffff + pb)
			dstColorRGBA64.A = uint16(qa*pa1/0xffff + pa)
			dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColor)
		}
	}
}

func (nnInterpolator) transform_Image_Image_Src(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point, opts *Options) {
	srcMask, smp := opts.SrcMask, opts.SrcMaskP
	dstMask, dmp := opts.DstMask, opts.DstMaskP
	dstColorRGBA64 := &color.RGBA64{}
	dstColor := color.Color(dstColorRGBA64)
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx0 := int(d2s[0]*dxf+d2s[1]*dyf+d2s[2]) + bias.X
			sy0 := int(d2s[3]*dxf+d2s[4]*dyf+d2s[5]) + bias.Y
			if !(image.Point{sx0, sy0}).In(sr) {
				continue
			}
			pr, pg, pb, pa := src.At(sx0, sy0).RGBA()
			if srcMask != nil {
				_, _, _, ma := srcMask.At(smp.X+sx0, smp.Y+sy0).RGBA()
				pr = pr * ma / 0xffff
				pg = pg * ma / 0xffff
				pb = pb * ma / 0xffff
				pa = pa * ma / 0xffff
			}
			if dstMask != nil {
				qr, qg, qb, qa := dst.At(dr.Min.X+int(dx), dr.Min.Y+int(dy)).RGBA()
				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA()
				pr = pr * ma / 0xffff
				pg = pg * ma / 0xffff
				pb = pb * ma / 0xffff
				pa = pa * ma / 0xffff
				pa1 := 0xffff - ma
				dstColorRGBA64.R = uint16(qr*pa1/0xffff + pr)
				dstColorRGBA64.G = uint16(qg*pa1/0xffff + pg)
				dstColorRGBA64.B = uint16(qb*pa1/0xffff + pb)
				dstColorRGBA64.A = uint16(qa*pa1/0xffff + pa)
				dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColor)
			} else {
				dstColorRGBA64.R = uint16(pr)
				dstColorRGBA64.G = uint16(pg)
				dstColorRGBA64.B = uint16(pb)
				dstColorRGBA64.A = uint16(pa)
				dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColor)
			}
		}
	}
}

func (z ablInterpolator) Scale(dst Image, dr image.Rectangle, src image.Image, sr image.Rectangle, op Op, opts *Options) {
	// Try to simplify a Scale to a Copy when DstMask is not specified.
	// If DstMask is not nil, Copy will call Scale back with same dr and sr, and cause stack overflow.
	if dr.Size() == sr.Size() && (opts == nil || opts.DstMask == nil) {
		Copy(dst, dr.Min, src, sr, op, opts)
		return
	}

	var o Options
	if opts != nil {
		o = *opts
	}

	// adr is the affected destination pixels.
	adr := dst.Bounds().Intersect(dr)
	adr, o.DstMask = clipAffectedDestRect(adr, o.DstMask, o.DstMaskP)
	if adr.Empty() || sr.Empty() {
		return
	}
	// Make adr relative to dr.Min.
	adr = adr.Sub(dr.Min)
	if op == Over && o.SrcMask == nil && opaque(src) {
		op = Src
	}

	// sr is the source pixels. If it extends beyond the src bounds,
	// we cannot use the type-specific fast paths, as they access
	// the Pix fields directly without bounds checking.
	//
	// Similarly, the fast paths assume that the masks are nil.
	if o.DstMask != nil || o.SrcMask != nil || !sr.In(src.Bounds()) {
		switch op {
		case Over:
			z.scale_Image_Image_Over(dst, dr, adr, src, sr, &o)
		case Src:
			z.scale_Image_Image_Src(dst, dr, adr, src, sr, &o)
		}
	} else if _, ok := src.(*image.Uniform); ok {
		Draw(dst, dr, src, src.Bounds().Min, op)
	} else {
		switch op {
		case Over:
			switch dst := dst.(type) {
			case *image.RGBA:
				switch src := src.(type) {
				case *image.NRGBA:
					z.scale_RGBA_NRGBA_Over(dst, dr, adr, src, sr, &o)
				case *image.RGBA:
					z.scale_RGBA_RGBA_Over(dst, dr, adr, src, sr, &o)
				default:
					z.scale_RGBA_Image_Over(dst, dr, adr, src, sr, &o)
				}
			default:
				switch src := src.(type) {
				default:
					z.scale_Image_Image_Over(dst, dr, adr, src, sr, &o)
				}
			}
		case Src:
			switch dst := dst.(type) {
			case *image.RGBA:
				switch src := src.(type) {
				case *image.Gray:
					z.scale_RGBA_Gray_Src(dst, dr, adr, src, sr, &o)
				case *image.NRGBA:
					z.scale_RGBA_NRGBA_Src(dst, dr, adr, src, sr, &o)
				case *image.RGBA:
					z.scale_RGBA_RGBA_Src(dst, dr, adr, src, sr, &o)
				case *image.YCbCr:
					switch src.SubsampleRatio {
					default:
						z.scale_RGBA_Image_Src(dst, dr, adr, src, sr, &o)
					case image.YCbCrSubsampleRatio444:
						z.scale_RGBA_YCbCr444_Src(dst, dr, adr, src, sr, &o)
					case image.YCbCrSubsampleRatio422:
						z.scale_RGBA_YCbCr422_Src(dst, dr, adr, src, sr, &o)
					case image.YCbCrSubsampleRatio420:
						z.scale_RGBA_YCbCr420_Src(dst, dr, adr, src, sr, &o)
					case image.YCbCrSubsampleRatio440:
						z.scale_RGBA_YCbCr440_Src(dst, dr, adr, src, sr, &o)
					}
				default:
					z.scale_RGBA_Image_Src(dst, dr, adr, src, sr, &o)
				}
			default:
				switch src := src.(type) {
				default:
					z.scale_Image_Image_Src(dst, dr, adr, src, sr, &o)
				}
			}
		}
	}
}

func (z ablInterpolator) Transform(dst Image, s2d f64.Aff3, src image.Image, sr image.Rectangle, op Op, opts *Options) {
	// Try to simplify a Transform to a Copy.
	if s2d[0] == 1 && s2d[1] == 0 && s2d[3] == 0 && s2d[4] == 1 {
		dx := int(s2d[2])
		dy := int(s2d[5])
		if float64(dx) == s2d[2] && float64(dy) == s2d[5] {
			Copy(dst, image.Point{X: sr.Min.X + dx, Y: sr.Min.X + dy}, src, sr, op, opts)
			return
		}
	}

	var o Options
	if opts != nil {
		o = *opts
	}

	dr := transformRect(&s2d, &sr)
	// adr is the affected destination pixels.
	adr := dst.Bounds().Intersect(dr)
	adr, o.DstMask = clipAffectedDestRect(adr, o.DstMask, o.DstMaskP)
	if adr.Empty() || sr.Empty() {
		return
	}
	if op == Over && o.SrcMask == nil && opaque(src) {
		op = Src
	}

	d2s := invert(&s2d)
	// bias is a translation of the mapping from dst coordinates to src
	// coordinates such that the latter temporarily have non-negative X
	// and Y coordinates. This allows us to write int(f) instead of
	// int(math.Floor(f)), since "round to zero" and "round down" are
	// equivalent when f >= 0, but the former is much cheaper. The X--
	// and Y-- are because the TransformLeaf methods have a "sx -= 0.5"
	// adjustment.
	bias := transformRect(&d2s, &adr).Min
	bias.X--
	bias.Y--
	d2s[2] -= float64(bias.X)
	d2s[5] -= float64(bias.Y)
	// Make adr relative to dr.Min.
	adr = adr.Sub(dr.Min)
	// sr is the source pixels. If it extends beyond the src bounds,
	// we cannot use the type-specific fast paths, as they access
	// the Pix fields directly without bounds checking.
	//
	// Similarly, the fast paths assume that the masks are nil.
	if o.DstMask != nil || o.SrcMask != nil || !sr.In(src.Bounds()) {
		switch op {
		case Over:
			z.transform_Image_Image_Over(dst, dr, adr, &d2s, src, sr, bias, &o)
		case Src:
			z.transform_Image_Image_Src(dst, dr, adr, &d2s, src, sr, bias, &o)
		}
	} else if u, ok := src.(*image.Uniform); ok {
		transform_Uniform(dst, dr, adr, &d2s, u, sr, bias, op)
	} else {
		switch op {
		case Over:
			switch dst := dst.(type) {
			case *image.RGBA:
				switch src := src.(type) {
				case *image.NRGBA:
					z.transform_RGBA_NRGBA_Over(dst, dr, adr, &d2s, src, sr, bias, &o)
				case *image.RGBA:
					z.transform_RGBA_RGBA_Over(dst, dr, adr, &d2s, src, sr, bias, &o)
				default:
					z.transform_RGBA_Image_Over(dst, dr, adr, &d2s, src, sr, bias, &o)
				}
			default:
				switch src := src.(type) {
				default:
					z.transform_Image_Image_Over(dst, dr, adr, &d2s, src, sr, bias, &o)
				}
			}
		case Src:
			switch dst := dst.(type) {
			case *image.RGBA:
				switch src := src.(type) {
				case *image.Gray:
					z.transform_RGBA_Gray_Src(dst, dr, adr, &d2s, src, sr, bias, &o)
				case *image.NRGBA:
					z.transform_RGBA_NRGBA_Src(dst, dr, adr, &d2s, src, sr, bias, &o)
				case *image.RGBA:
					z.transform_RGBA_RGBA_Src(dst, dr, adr, &d2s, src, sr, bias, &o)
				case *image.YCbCr:
					switch src.SubsampleRatio {
					default:
						z.transform_RGBA_Image_Src(dst, dr, adr, &d2s, src, sr, bias, &o)
					case image.YCbCrSubsampleRatio444:
						z.transform_RGBA_YCbCr444_Src(dst, dr, adr, &d2s, src, sr, bias, &o)
					case image.YCbCrSubsampleRatio422:
						z.transform_RGBA_YCbCr422_Src(dst, dr, adr, &d2s, src, sr, bias, &o)
					case image.YCbCrSubsampleRatio420:
						z.transform_RGBA_YCbCr420_Src(dst, dr, adr, &d2s, src, sr, bias, &o)
					case image.YCbCrSubsampleRatio440:
						z.transform_RGBA_YCbCr440_Src(dst, dr, adr, &d2s, src, sr, bias, &o)
					}
				default:
					z.transform_RGBA_Image_Src(dst, dr, adr, &d2s, src, sr, bias, &o)
				}
			default:
				switch src := src.(type) {
				default:
					z.transform_Image_Image_Src(dst, dr, adr, &d2s, src, sr, bias, &o)
				}
			}
		}
	}
}

func (ablInterpolator) scale_RGBA_Gray_Src(dst *image.RGBA, dr, adr image.Rectangle, src *image.Gray, sr image.Rectangle, opts *Options) {
	sw := int32(sr.Dx())
	sh := int32(sr.Dy())
	yscale := float64(sh) / float64(dr.Dy())
	xscale := float64(sw) / float64(dr.Dx())
	swMinus1, shMinus1 := sw-1, sh-1

	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		sy := (float64(dy)+0.5)*yscale - 0.5
		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
		// sx, below.
		sy0 := int32(sy)
		yFrac0 := sy - float64(sy0)
		yFrac1 := 1 - yFrac0
		sy1 := sy0 + 1
		if sy < 0 {
			sy0, sy1 = 0, 0
			yFrac0, yFrac1 = 0, 1
		} else if sy1 > shMinus1 {
			sy0, sy1 = shMinus1, shMinus1
			yFrac0, yFrac1 = 1, 0
		}
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4

		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			sx := (float64(dx)+0.5)*xscale - 0.5
			sx0 := int32(sx)
			xFrac0 := sx - float64(sx0)
			xFrac1 := 1 - xFrac0
			sx1 := sx0 + 1
			if sx < 0 {
				sx0, sx1 = 0, 0
				xFrac0, xFrac1 = 0, 1
			} else if sx1 > swMinus1 {
				sx0, sx1 = swMinus1, swMinus1
				xFrac0, xFrac1 = 1, 0
			}

			s00i := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.Stride + (sr.Min.X + int(sx0) - src.Rect.Min.X)
			s00ru := uint32(src.Pix[s00i]) * 0x101
			s00r := float64(s00ru)
			s10i := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.Stride + (sr.Min.X + int(sx1) - src.Rect.Min.X)
			s10ru := uint32(src.Pix[s10i]) * 0x101
			s10r := float64(s10ru)
			s10r = xFrac1*s00r + xFrac0*s10r
			s01i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.Stride + (sr.Min.X + int(sx0) - src.Rect.Min.X)
			s01ru := uint32(src.Pix[s01i]) * 0x101
			s01r := float64(s01ru)
			s11i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.Stride + (sr.Min.X + int(sx1) - src.Rect.Min.X)
			s11ru := uint32(src.Pix[s11i]) * 0x101
			s11r := float64(s11ru)
			s11r = xFrac1*s01r + xFrac0*s11r
			s11r = yFrac1*s10r + yFrac0*s11r
			pr := uint32(s11r)
			out := uint8(pr >> 8)
			dst.Pix[d+0] = out
			dst.Pix[d+1] = out
			dst.Pix[d+2] = out
			dst.Pix[d+3] = 0xff
		}
	}
}

func (ablInterpolator) scale_RGBA_NRGBA_Over(dst *image.RGBA, dr, adr image.Rectangle, src *image.NRGBA, sr image.Rectangle, opts *Options) {
	sw := int32(sr.Dx())
	sh := int32(sr.Dy())
	yscale := float64(sh) / float64(dr.Dy())
	xscale := float64(sw) / float64(dr.Dx())
	swMinus1, shMinus1 := sw-1, sh-1

	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		sy := (float64(dy)+0.5)*yscale - 0.5
		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
		// sx, below.
		sy0 := int32(sy)
		yFrac0 := sy - float64(sy0)
		yFrac1 := 1 - yFrac0
		sy1 := sy0 + 1
		if sy < 0 {
			sy0, sy1 = 0, 0
			yFrac0, yFrac1 = 0, 1
		} else if sy1 > shMinus1 {
			sy0, sy1 = shMinus1, shMinus1
			yFrac0, yFrac1 = 1, 0
		}
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4

		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			sx := (float64(dx)+0.5)*xscale - 0.5
			sx0 := int32(sx)
			xFrac0 := sx - float64(sx0)
			xFrac1 := 1 - xFrac0
			sx1 := sx0 + 1
			if sx < 0 {
				sx0, sx1 = 0, 0
				xFrac0, xFrac1 = 0, 1
			} else if sx1 > swMinus1 {
				sx0, sx1 = swMinus1, swMinus1
				xFrac0, xFrac1 = 1, 0
			}

			s00i := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.Stride + (sr.Min.X+int(sx0)-src.Rect.Min.X)*4
			s00au := uint32(src.Pix[s00i+3]) * 0x101
			s00ru := uint32(src.Pix[s00i+0]) * s00au / 0xff
			s00gu := uint32(src.Pix[s00i+1]) * s00au / 0xff
			s00bu := uint32(src.Pix[s00i+2]) * s00au / 0xff
			s00r := float64(s00ru)
			s00g := float64(s00gu)
			s00b := float64(s00bu)
			s00a := float64(s00au)
			s10i := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.Stride + (sr.Min.X+int(sx1)-src.Rect.Min.X)*4
			s10au := uint32(src.Pix[s10i+3]) * 0x101
			s10ru := uint32(src.Pix[s10i+0]) * s10au / 0xff
			s10gu := uint32(src.Pix[s10i+1]) * s10au / 0xff
			s10bu := uint32(src.Pix[s10i+2]) * s10au / 0xff
			s10r := float64(s10ru)
			s10g := float64(s10gu)
			s10b := float64(s10bu)
			s10a := float64(s10au)
			s10r = xFrac1*s00r + xFrac0*s10r
			s10g = xFrac1*s00g + xFrac0*s10g
			s10b = xFrac1*s00b + xFrac0*s10b
			s10a = xFrac1*s00a + xFrac0*s10a
			s01i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.Stride + (sr.Min.X+int(sx0)-src.Rect.Min.X)*4
			s01au := uint32(src.Pix[s01i+3]) * 0x101
			s01ru := uint32(src.Pix[s01i+0]) * s01au / 0xff
			s01gu := uint32(src.Pix[s01i+1]) * s01au / 0xff
			s01bu := uint32(src.Pix[s01i+2]) * s01au / 0xff
			s01r := float64(s01ru)
			s01g := float64(s01gu)
			s01b := float64(s01bu)
			s01a := float64(s01au)
			s11i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.Stride + (sr.Min.X+int(sx1)-src.Rect.Min.X)*4
			s11au := uint32(src.Pix[s11i+3]) * 0x101
			s11ru := uint32(src.Pix[s11i+0]) * s11au / 0xff
			s11gu := uint32(src.Pix[s11i+1]) * s11au / 0xff
			s11bu := uint32(src.Pix[s11i+2]) * s11au / 0xff
			s11r := float64(s11ru)
			s11g := float64(s11gu)
			s11b := float64(s11bu)
			s11a := float64(s11au)
			s11r = xFrac1*s01r + xFrac0*s11r
			s11g = xFrac1*s01g + xFrac0*s11g
			s11b = xFrac1*s01b + xFrac0*s11b
			s11a = xFrac1*s01a + xFrac0*s11a
			s11r = yFrac1*s10r + yFrac0*s11r
			s11g = yFrac1*s10g + yFrac0*s11g
			s11b = yFrac1*s10b + yFrac0*s11b
			s11a = yFrac1*s10a + yFrac0*s11a
			pr := uint32(s11r)
			pg := uint32(s11g)
			pb := uint32(s11b)
			pa := uint32(s11a)
			pa1 := (0xffff - pa) * 0x101
			dst.Pix[d+0] = uint8((uint32(dst.Pix[d+0])*pa1/0xffff + pr) >> 8)
			dst.Pix[d+1] = uint8((uint32(dst.Pix[d+1])*pa1/0xffff + pg) >> 8)
			dst.Pix[d+2] = uint8((uint32(dst.Pix[d+2])*pa1/0xffff + pb) >> 8)
			dst.Pix[d+3] = uint8((uint32(dst.Pix[d+3])*pa1/0xffff + pa) >> 8)
		}
	}
}

func (ablInterpolator) scale_RGBA_NRGBA_Src(dst *image.RGBA, dr, adr image.Rectangle, src *image.NRGBA, sr image.Rectangle, opts *Options) {
	sw := int32(sr.Dx())
	sh := int32(sr.Dy())
	yscale := float64(sh) / float64(dr.Dy())
	xscale := float64(sw) / float64(dr.Dx())
	swMinus1, shMinus1 := sw-1, sh-1

	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		sy := (float64(dy)+0.5)*yscale - 0.5
		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
		// sx, below.
		sy0 := int32(sy)
		yFrac0 := sy - float64(sy0)
		yFrac1 := 1 - yFrac0
		sy1 := sy0 + 1
		if sy < 0 {
			sy0, sy1 = 0, 0
			yFrac0, yFrac1 = 0, 1
		} else if sy1 > shMinus1 {
			sy0, sy1 = shMinus1, shMinus1
			yFrac0, yFrac1 = 1, 0
		}
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4

		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			sx := (float64(dx)+0.5)*xscale - 0.5
			sx0 := int32(sx)
			xFrac0 := sx - float64(sx0)
			xFrac1 := 1 - xFrac0
			sx1 := sx0 + 1
			if sx < 0 {
				sx0, sx1 = 0, 0
				xFrac0, xFrac1 = 0, 1
			} else if sx1 > swMinus1 {
				sx0, sx1 = swMinus1, swMinus1
				xFrac0, xFrac1 = 1, 0
			}

			s00i := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.Stride + (sr.Min.X+int(sx0)-src.Rect.Min.X)*4
			s00au := uint32(src.Pix[s00i+3]) * 0x101
			s00ru := uint32(src.Pix[s00i+0]) * s00au / 0xff
			s00gu := uint32(src.Pix[s00i+1]) * s00au / 0xff
			s00bu := uint32(src.Pix[s00i+2]) * s00au / 0xff
			s00r := float64(s00ru)
			s00g := float64(s00gu)
			s00b := float64(s00bu)
			s00a := float64(s00au)
			s10i := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.Stride + (sr.Min.X+int(sx1)-src.Rect.Min.X)*4
			s10au := uint32(src.Pix[s10i+3]) * 0x101
			s10ru := uint32(src.Pix[s10i+0]) * s10au / 0xff
			s10gu := uint32(src.Pix[s10i+1]) * s10au / 0xff
			s10bu := uint32(src.Pix[s10i+2]) * s10au / 0xff
			s10r := float64(s10ru)
			s10g := float64(s10gu)
			s10b := float64(s10bu)
			s10a := float64(s10au)
			s10r = xFrac1*s00r + xFrac0*s10r
			s10g = xFrac1*s00g + xFrac0*s10g
			s10b = xFrac1*s00b + xFrac0*s10b
			s10a = xFrac1*s00a + xFrac0*s10a
			s01i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.Stride + (sr.Min.X+int(sx0)-src.Rect.Min.X)*4
			s01au := uint32(src.Pix[s01i+3]) * 0x101
			s01ru := uint32(src.Pix[s01i+0]) * s01au / 0xff
			s01gu := uint32(src.Pix[s01i+1]) * s01au / 0xff
			s01bu := uint32(src.Pix[s01i+2]) * s01au / 0xff
			s01r := float64(s01ru)
			s01g := float64(s01gu)
			s01b := float64(s01bu)
			s01a := float64(s01au)
			s11i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.Stride + (sr.Min.X+int(sx1)-src.Rect.Min.X)*4
			s11au := uint32(src.Pix[s11i+3]) * 0x101
			s11ru := uint32(src.Pix[s11i+0]) * s11au / 0xff
			s11gu := uint32(src.Pix[s11i+1]) * s11au / 0xff
			s11bu := uint32(src.Pix[s11i+2]) * s11au / 0xff
			s11r := float64(s11ru)
			s11g := float64(s11gu)
			s11b := float64(s11bu)
			s11a := float64(s11au)
			s11r = xFrac1*s01r + xFrac0*s11r
			s11g = xFrac1*s01g + xFrac0*s11g
			s11b = xFrac1*s01b + xFrac0*s11b
			s11a = xFrac1*s01a + xFrac0*s11a
			s11r = yFrac1*s10r + yFrac0*s11r
			s11g = yFrac1*s10g + yFrac0*s11g
			s11b = yFrac1*s10b + yFrac0*s11b
			s11a = yFrac1*s10a + yFrac0*s11a
			pr := uint32(s11r)
			pg := uint32(s11g)
			pb := uint32(s11b)
			pa := uint32(s11a)
			dst.Pix[d+0] = uint8(pr >> 8)
			dst.Pix[d+1] = uint8(pg >> 8)
			dst.Pix[d+2] = uint8(pb >> 8)
			dst.Pix[d+3] = uint8(pa >> 8)
		}
	}
}

func (ablInterpolator) scale_RGBA_RGBA_Over(dst *image.RGBA, dr, adr image.Rectangle, src *image.RGBA, sr image.Rectangle, opts *Options) {
	sw := int32(sr.Dx())
	sh := int32(sr.Dy())
	yscale := float64(sh) / float64(dr.Dy())
	xscale := float64(sw) / float64(dr.Dx())
	swMinus1, shMinus1 := sw-1, sh-1

	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		sy := (float64(dy)+0.5)*yscale - 0.5
		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
		// sx, below.
		sy0 := int32(sy)
		yFrac0 := sy - float64(sy0)
		yFrac1 := 1 - yFrac0
		sy1 := sy0 + 1
		if sy < 0 {
			sy0, sy1 = 0, 0
			yFrac0, yFrac1 = 0, 1
		} else if sy1 > shMinus1 {
			sy0, sy1 = shMinus1, shMinus1
			yFrac0, yFrac1 = 1, 0
		}
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4

		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			sx := (float64(dx)+0.5)*xscale - 0.5
			sx0 := int32(sx)
			xFrac0 := sx - float64(sx0)
			xFrac1 := 1 - xFrac0
			sx1 := sx0 + 1
			if sx < 0 {
				sx0, sx1 = 0, 0
				xFrac0, xFrac1 = 0, 1
			} else if sx1 > swMinus1 {
				sx0, sx1 = swMinus1, swMinus1
				xFrac0, xFrac1 = 1, 0
			}

			s00i := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.Stride + (sr.Min.X+int(sx0)-src.Rect.Min.X)*4
			s00ru := uint32(src.Pix[s00i+0]) * 0x101
			s00gu := uint32(src.Pix[s00i+1]) * 0x101
			s00bu := uint32(src.Pix[s00i+2]) * 0x101
			s00au := uint32(src.Pix[s00i+3]) * 0x101
			s00r := float64(s00ru)
			s00g := float64(s00gu)
			s00b := float64(s00bu)
			s00a := float64(s00au)
			s10i := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.Stride + (sr.Min.X+int(sx1)-src.Rect.Min.X)*4
			s10ru := uint32(src.Pix[s10i+0]) * 0x101
			s10gu := uint32(src.Pix[s10i+1]) * 0x101
			s10bu := uint32(src.Pix[s10i+2]) * 0x101
			s10au := uint32(src.Pix[s10i+3]) * 0x101
			s10r := float64(s10ru)
			s10g := float64(s10gu)
			s10b := float64(s10bu)
			s10a := float64(s10au)
			s10r = xFrac1*s00r + xFrac0*s10r
			s10g = xFrac1*s00g + xFrac0*s10g
			s10b = xFrac1*s00b + xFrac0*s10b
			s10a = xFrac1*s00a + xFrac0*s10a
			s01i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.Stride + (sr.Min.X+int(sx0)-src.Rect.Min.X)*4
			s01ru := uint32(src.Pix[s01i+0]) * 0x101
			s01gu := uint32(src.Pix[s01i+1]) * 0x101
			s01bu := uint32(src.Pix[s01i+2]) * 0x101
			s01au := uint32(src.Pix[s01i+3]) * 0x101
			s01r := float64(s01ru)
			s01g := float64(s01gu)
			s01b := float64(s01bu)
			s01a := float64(s01au)
			s11i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.Stride + (sr.Min.X+int(sx1)-src.Rect.Min.X)*4
			s11ru := uint32(src.Pix[s11i+0]) * 0x101
			s11gu := uint32(src.Pix[s11i+1]) * 0x101
			s11bu := uint32(src.Pix[s11i+2]) * 0x101
			s11au := uint32(src.Pix[s11i+3]) * 0x101
			s11r := float64(s11ru)
			s11g := float64(s11gu)
			s11b := float64(s11bu)
			s11a := float64(s11au)
			s11r = xFrac1*s01r + xFrac0*s11r
			s11g = xFrac1*s01g + xFrac0*s11g
			s11b = xFrac1*s01b + xFrac0*s11b
			s11a = xFrac1*s01a + xFrac0*s11a
			s11r = yFrac1*s10r + yFrac0*s11r
			s11g = yFrac1*s10g + yFrac0*s11g
			s11b = yFrac1*s10b + yFrac0*s11b
			s11a = yFrac1*s10a + yFrac0*s11a
			pr := uint32(s11r)
			pg := uint32(s11g)
			pb := uint32(s11b)
			pa := uint32(s11a)
			pa1 := (0xffff - pa) * 0x101
			dst.Pix[d+0] = uint8((uint32(dst.Pix[d+0])*pa1/0xffff + pr) >> 8)
			dst.Pix[d+1] = uint8((uint32(dst.Pix[d+1])*pa1/0xffff + pg) >> 8)
			dst.Pix[d+2] = uint8((uint32(dst.Pix[d+2])*pa1/0xffff + pb) >> 8)
			dst.Pix[d+3] = uint8((uint32(dst.Pix[d+3])*pa1/0xffff + pa) >> 8)
		}
	}
}

func (ablInterpolator) scale_RGBA_RGBA_Src(dst *image.RGBA, dr, adr image.Rectangle, src *image.RGBA, sr image.Rectangle, opts *Options) {
	sw := int32(sr.Dx())
	sh := int32(sr.Dy())
	yscale := float64(sh) / float64(dr.Dy())
	xscale := float64(sw) / float64(dr.Dx())
	swMinus1, shMinus1 := sw-1, sh-1

	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		sy := (float64(dy)+0.5)*yscale - 0.5
		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
		// sx, below.
		sy0 := int32(sy)
		yFrac0 := sy - float64(sy0)
		yFrac1 := 1 - yFrac0
		sy1 := sy0 + 1
		if sy < 0 {
			sy0, sy1 = 0, 0
			yFrac0, yFrac1 = 0, 1
		} else if sy1 > shMinus1 {
			sy0, sy1 = shMinus1, shMinus1
			yFrac0, yFrac1 = 1, 0
		}
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4

		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			sx := (float64(dx)+0.5)*xscale - 0.5
			sx0 := int32(sx)
			xFrac0 := sx - float64(sx0)
			xFrac1 := 1 - xFrac0
			sx1 := sx0 + 1
			if sx < 0 {
				sx0, sx1 = 0, 0
				xFrac0, xFrac1 = 0, 1
			} else if sx1 > swMinus1 {
				sx0, sx1 = swMinus1, swMinus1
				xFrac0, xFrac1 = 1, 0
			}

			s00i := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.Stride + (sr.Min.X+int(sx0)-src.Rect.Min.X)*4
			s00ru := uint32(src.Pix[s00i+0]) * 0x101
			s00gu := uint32(src.Pix[s00i+1]) * 0x101
			s00bu := uint32(src.Pix[s00i+2]) * 0x101
			s00au := uint32(src.Pix[s00i+3]) * 0x101
			s00r := float64(s00ru)
			s00g := float64(s00gu)
			s00b := float64(s00bu)
			s00a := float64(s00au)
			s10i := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.Stride + (sr.Min.X+int(sx1)-src.Rect.Min.X)*4
			s10ru := uint32(src.Pix[s10i+0]) * 0x101
			s10gu := uint32(src.Pix[s10i+1]) * 0x101
			s10bu := uint32(src.Pix[s10i+2]) * 0x101
			s10au := uint32(src.Pix[s10i+3]) * 0x101
			s10r := float64(s10ru)
			s10g := float64(s10gu)
			s10b := float64(s10bu)
			s10a := float64(s10au)
			s10r = xFrac1*s00r + xFrac0*s10r
			s10g = xFrac1*s00g + xFrac0*s10g
			s10b = xFrac1*s00b + xFrac0*s10b
			s10a = xFrac1*s00a + xFrac0*s10a
			s01i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.Stride + (sr.Min.X+int(sx0)-src.Rect.Min.X)*4
			s01ru := uint32(src.Pix[s01i+0]) * 0x101
			s01gu := uint32(src.Pix[s01i+1]) * 0x101
			s01bu := uint32(src.Pix[s01i+2]) * 0x101
			s01au := uint32(src.Pix[s01i+3]) * 0x101
			s01r := float64(s01ru)
			s01g := float64(s01gu)
			s01b := float64(s01bu)
			s01a := float64(s01au)
			s11i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.Stride + (sr.Min.X+int(sx1)-src.Rect.Min.X)*4
			s11ru := uint32(src.Pix[s11i+0]) * 0x101
			s11gu := uint32(src.Pix[s11i+1]) * 0x101
			s11bu := uint32(src.Pix[s11i+2]) * 0x101
			s11au := uint32(src.Pix[s11i+3]) * 0x101
			s11r := float64(s11ru)
			s11g := float64(s11gu)
			s11b := float64(s11bu)
			s11a := float64(s11au)
			s11r = xFrac1*s01r + xFrac0*s11r
			s11g = xFrac1*s01g + xFrac0*s11g
			s11b = xFrac1*s01b + xFrac0*s11b
			s11a = xFrac1*s01a + xFrac0*s11a
			s11r = yFrac1*s10r + yFrac0*s11r
			s11g = yFrac1*s10g + yFrac0*s11g
			s11b = yFrac1*s10b + yFrac0*s11b
			s11a = yFrac1*s10a + yFrac0*s11a
			pr := uint32(s11r)
			pg := uint32(s11g)
			pb := uint32(s11b)
			pa := uint32(s11a)
			dst.Pix[d+0] = uint8(pr >> 8)
			dst.Pix[d+1] = uint8(pg >> 8)
			dst.Pix[d+2] = uint8(pb >> 8)
			dst.Pix[d+3] = uint8(pa >> 8)
		}
	}
}

func (ablInterpolator) scale_RGBA_YCbCr444_Src(dst *image.RGBA, dr, adr image.Rectangle, src *image.YCbCr, sr image.Rectangle, opts *Options) {
	sw := int32(sr.Dx())
	sh := int32(sr.Dy())
	yscale := float64(sh) / float64(dr.Dy())
	xscale := float64(sw) / float64(dr.Dx())
	swMinus1, shMinus1 := sw-1, sh-1

	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		sy := (float64(dy)+0.5)*yscale - 0.5
		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
		// sx, below.
		sy0 := int32(sy)
		yFrac0 := sy - float64(sy0)
		yFrac1 := 1 - yFrac0
		sy1 := sy0 + 1
		if sy < 0 {
			sy0, sy1 = 0, 0
			yFrac0, yFrac1 = 0, 1
		} else if sy1 > shMinus1 {
			sy0, sy1 = shMinus1, shMinus1
			yFrac0, yFrac1 = 1, 0
		}
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4

		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			sx := (float64(dx)+0.5)*xscale - 0.5
			sx0 := int32(sx)
			xFrac0 := sx - float64(sx0)
			xFrac1 := 1 - xFrac0
			sx1 := sx0 + 1
			if sx < 0 {
				sx0, sx1 = 0, 0
				xFrac0, xFrac1 = 0, 1
			} else if sx1 > swMinus1 {
				sx0, sx1 = swMinus1, swMinus1
				xFrac0, xFrac1 = 1, 0
			}

			s00i := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx0) - src.Rect.Min.X)
			s00j := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.CStride + (sr.Min.X + int(sx0) - src.Rect.Min.X)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			s00yy1 := int(src.Y[s00i]) * 0x10101
			s00cb1 := int(src.Cb[s00j]) - 128
			s00cr1 := int(src.Cr[s00j]) - 128
			s00ru := (s00yy1 + 91881*s00cr1) >> 8
			s00gu := (s00yy1 - 22554*s00cb1 - 46802*s00cr1) >> 8
			s00bu := (s00yy1 + 116130*s00cb1) >> 8
			if s00ru < 0 {
				s00ru = 0
			} else if s00ru > 0xffff {
				s00ru = 0xffff
			}
			if s00gu < 0 {
				s00gu = 0
			} else if s00gu > 0xffff {
				s00gu = 0xffff
			}
			if s00bu < 0 {
				s00bu = 0
			} else if s00bu > 0xffff {
				s00bu = 0xffff
			}

			s00r := float64(s00ru)
			s00g := float64(s00gu)
			s00b := float64(s00bu)
			s10i := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx1) - src.Rect.Min.X)
			s10j := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.CStride + (sr.Min.X + int(sx1) - src.Rect.Min.X)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			s10yy1 := int(src.Y[s10i]) * 0x10101
			s10cb1 := int(src.Cb[s10j]) - 128
			s10cr1 := int(src.Cr[s10j]) - 128
			s10ru := (s10yy1 + 91881*s10cr1) >> 8
			s10gu := (s10yy1 - 22554*s10cb1 - 46802*s10cr1) >> 8
			s10bu := (s10yy1 + 116130*s10cb1) >> 8
			if s10ru < 0 {
				s10ru = 0
			} else if s10ru > 0xffff {
				s10ru = 0xffff
			}
			if s10gu < 0 {
				s10gu = 0
			} else if s10gu > 0xffff {
				s10gu = 0xffff
			}
			if s10bu < 0 {
				s10bu = 0
			} else if s10bu > 0xffff {
				s10bu = 0xffff
			}

			s10r := float64(s10ru)
			s10g := float64(s10gu)
			s10b := float64(s10bu)
			s10r = xFrac1*s00r + xFrac0*s10r
			s10g = xFrac1*s00g + xFrac0*s10g
			s10b = xFrac1*s00b + xFrac0*s10b
			s01i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx0) - src.Rect.Min.X)
			s01j := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.CStride + (sr.Min.X + int(sx0) - src.Rect.Min.X)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			s01yy1 := int(src.Y[s01i]) * 0x10101
			s01cb1 := int(src.Cb[s01j]) - 128
			s01cr1 := int(src.Cr[s01j]) - 128
			s01ru := (s01yy1 + 91881*s01cr1) >> 8
			s01gu := (s01yy1 - 22554*s01cb1 - 46802*s01cr1) >> 8
			s01bu := (s01yy1 + 116130*s01cb1) >> 8
			if s01ru < 0 {
				s01ru = 0
			} else if s01ru > 0xffff {
				s01ru = 0xffff
			}
			if s01gu < 0 {
				s01gu = 0
			} else if s01gu > 0xffff {
				s01gu = 0xffff
			}
			if s01bu < 0 {
				s01bu = 0
			} else if s01bu > 0xffff {
				s01bu = 0xffff
			}

			s01r := float64(s01ru)
			s01g := float64(s01gu)
			s01b := float64(s01bu)
			s11i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx1) - src.Rect.Min.X)
			s11j := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.CStride + (sr.Min.X + int(sx1) - src.Rect.Min.X)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			s11yy1 := int(src.Y[s11i]) * 0x10101
			s11cb1 := int(src.Cb[s11j]) - 128
			s11cr1 := int(src.Cr[s11j]) - 128
			s11ru := (s11yy1 + 91881*s11cr1) >> 8
			s11gu := (s11yy1 - 22554*s11cb1 - 46802*s11cr1) >> 8
			s11bu := (s11yy1 + 116130*s11cb1) >> 8
			if s11ru < 0 {
				s11ru = 0
			} else if s11ru > 0xffff {
				s11ru = 0xffff
			}
			if s11gu < 0 {
				s11gu = 0
			} else if s11gu > 0xffff {
				s11gu = 0xffff
			}
			if s11bu < 0 {
				s11bu = 0
			} else if s11bu > 0xffff {
				s11bu = 0xffff
			}

			s11r := float64(s11ru)
			s11g := float64(s11gu)
			s11b := float64(s11bu)
			s11r = xFrac1*s01r + xFrac0*s11r
			s11g = xFrac1*s01g + xFrac0*s11g
			s11b = xFrac1*s01b + xFrac0*s11b
			s11r = yFrac1*s10r + yFrac0*s11r
			s11g = yFrac1*s10g + yFrac0*s11g
			s11b = yFrac1*s10b + yFrac0*s11b
			pr := uint32(s11r)
			pg := uint32(s11g)
			pb := uint32(s11b)
			dst.Pix[d+0] = uint8(pr >> 8)
			dst.Pix[d+1] = uint8(pg >> 8)
			dst.Pix[d+2] = uint8(pb >> 8)
			dst.Pix[d+3] = 0xff
		}
	}
}

func (ablInterpolator) scale_RGBA_YCbCr422_Src(dst *image.RGBA, dr, adr image.Rectangle, src *image.YCbCr, sr image.Rectangle, opts *Options) {
	sw := int32(sr.Dx())
	sh := int32(sr.Dy())
	yscale := float64(sh) / float64(dr.Dy())
	xscale := float64(sw) / float64(dr.Dx())
	swMinus1, shMinus1 := sw-1, sh-1

	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		sy := (float64(dy)+0.5)*yscale - 0.5
		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
		// sx, below.
		sy0 := int32(sy)
		yFrac0 := sy - float64(sy0)
		yFrac1 := 1 - yFrac0
		sy1 := sy0 + 1
		if sy < 0 {
			sy0, sy1 = 0, 0
			yFrac0, yFrac1 = 0, 1
		} else if sy1 > shMinus1 {
			sy0, sy1 = shMinus1, shMinus1
			yFrac0, yFrac1 = 1, 0
		}
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4

		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			sx := (float64(dx)+0.5)*xscale - 0.5
			sx0 := int32(sx)
			xFrac0 := sx - float64(sx0)
			xFrac1 := 1 - xFrac0
			sx1 := sx0 + 1
			if sx < 0 {
				sx0, sx1 = 0, 0
				xFrac0, xFrac1 = 0, 1
			} else if sx1 > swMinus1 {
				sx0, sx1 = swMinus1, swMinus1
				xFrac0, xFrac1 = 1, 0
			}

			s00i := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx0) - src.Rect.Min.X)
			s00j := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.CStride + ((sr.Min.X+int(sx0))/2 - src.Rect.Min.X/2)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			s00yy1 := int(src.Y[s00i]) * 0x10101
			s00cb1 := int(src.Cb[s00j]) - 128
			s00cr1 := int(src.Cr[s00j]) - 128
			s00ru := (s00yy1 + 91881*s00cr1) >> 8
			s00gu := (s00yy1 - 22554*s00cb1 - 46802*s00cr1) >> 8
			s00bu := (s00yy1 + 116130*s00cb1) >> 8
			if s00ru < 0 {
				s00ru = 0
			} else if s00ru > 0xffff {
				s00ru = 0xffff
			}
			if s00gu < 0 {
				s00gu = 0
			} else if s00gu > 0xffff {
				s00gu = 0xffff
			}
			if s00bu < 0 {
				s00bu = 0
			} else if s00bu > 0xffff {
				s00bu = 0xffff
			}

			s00r := float64(s00ru)
			s00g := float64(s00gu)
			s00b := float64(s00bu)
			s10i := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx1) - src.Rect.Min.X)
			s10j := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.CStride + ((sr.Min.X+int(sx1))/2 - src.Rect.Min.X/2)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			s10yy1 := int(src.Y[s10i]) * 0x10101
			s10cb1 := int(src.Cb[s10j]) - 128
			s10cr1 := int(src.Cr[s10j]) - 128
			s10ru := (s10yy1 + 91881*s10cr1) >> 8
			s10gu := (s10yy1 - 22554*s10cb1 - 46802*s10cr1) >> 8
			s10bu := (s10yy1 + 116130*s10cb1) >> 8
			if s10ru < 0 {
				s10ru = 0
			} else if s10ru > 0xffff {
				s10ru = 0xffff
			}
			if s10gu < 0 {
				s10gu = 0
			} else if s10gu > 0xffff {
				s10gu = 0xffff
			}
			if s10bu < 0 {
				s10bu = 0
			} else if s10bu > 0xffff {
				s10bu = 0xffff
			}

			s10r := float64(s10ru)
			s10g := float64(s10gu)
			s10b := float64(s10bu)
			s10r = xFrac1*s00r + xFrac0*s10r
			s10g = xFrac1*s00g + xFrac0*s10g
			s10b = xFrac1*s00b + xFrac0*s10b
			s01i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx0) - src.Rect.Min.X)
			s01j := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.CStride + ((sr.Min.X+int(sx0))/2 - src.Rect.Min.X/2)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			s01yy1 := int(src.Y[s01i]) * 0x10101
			s01cb1 := int(src.Cb[s01j]) - 128
			s01cr1 := int(src.Cr[s01j]) - 128
			s01ru := (s01yy1 + 91881*s01cr1) >> 8
			s01gu := (s01yy1 - 22554*s01cb1 - 46802*s01cr1) >> 8
			s01bu := (s01yy1 + 116130*s01cb1) >> 8
			if s01ru < 0 {
				s01ru = 0
			} else if s01ru > 0xffff {
				s01ru = 0xffff
			}
			if s01gu < 0 {
				s01gu = 0
			} else if s01gu > 0xffff {
				s01gu = 0xffff
			}
			if s01bu < 0 {
				s01bu = 0
			} else if s01bu > 0xffff {
				s01bu = 0xffff
			}

			s01r := float64(s01ru)
			s01g := float64(s01gu)
			s01b := float64(s01bu)
			s11i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx1) - src.Rect.Min.X)
			s11j := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.CStride + ((sr.Min.X+int(sx1))/2 - src.Rect.Min.X/2)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			s11yy1 := int(src.Y[s11i]) * 0x10101
			s11cb1 := int(src.Cb[s11j]) - 128
			s11cr1 := int(src.Cr[s11j]) - 128
			s11ru := (s11yy1 + 91881*s11cr1) >> 8
			s11gu := (s11yy1 - 22554*s11cb1 - 46802*s11cr1) >> 8
			s11bu := (s11yy1 + 116130*s11cb1) >> 8
			if s11ru < 0 {
				s11ru = 0
			} else if s11ru > 0xffff {
				s11ru = 0xffff
			}
			if s11gu < 0 {
				s11gu = 0
			} else if s11gu > 0xffff {
				s11gu = 0xffff
			}
			if s11bu < 0 {
				s11bu = 0
			} else if s11bu > 0xffff {
				s11bu = 0xffff
			}

			s11r := float64(s11ru)
			s11g := float64(s11gu)
			s11b := float64(s11bu)
			s11r = xFrac1*s01r + xFrac0*s11r
			s11g = xFrac1*s01g + xFrac0*s11g
			s11b = xFrac1*s01b + xFrac0*s11b
			s11r = yFrac1*s10r + yFrac0*s11r
			s11g = yFrac1*s10g + yFrac0*s11g
			s11b = yFrac1*s10b + yFrac0*s11b
			pr := uint32(s11r)
			pg := uint32(s11g)
			pb := uint32(s11b)
			dst.Pix[d+0] = uint8(pr >> 8)
			dst.Pix[d+1] = uint8(pg >> 8)
			dst.Pix[d+2] = uint8(pb >> 8)
			dst.Pix[d+3] = 0xff
		}
	}
}

func (ablInterpolator) scale_RGBA_YCbCr420_Src(dst *image.RGBA, dr, adr image.Rectangle, src *image.YCbCr, sr image.Rectangle, opts *Options) {
	sw := int32(sr.Dx())
	sh := int32(sr.Dy())
	yscale := float64(sh) / float64(dr.Dy())
	xscale := float64(sw) / float64(dr.Dx())
	swMinus1, shMinus1 := sw-1, sh-1

	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		sy := (float64(dy)+0.5)*yscale - 0.5
		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
		// sx, below.
		sy0 := int32(sy)
		yFrac0 := sy - float64(sy0)
		yFrac1 := 1 - yFrac0
		sy1 := sy0 + 1
		if sy < 0 {
			sy0, sy1 = 0, 0
			yFrac0, yFrac1 = 0, 1
		} else if sy1 > shMinus1 {
			sy0, sy1 = shMinus1, shMinus1
			yFrac0, yFrac1 = 1, 0
		}
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4

		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			sx := (float64(dx)+0.5)*xscale - 0.5
			sx0 := int32(sx)
			xFrac0 := sx - float64(sx0)
			xFrac1 := 1 - xFrac0
			sx1 := sx0 + 1
			if sx < 0 {
				sx0, sx1 = 0, 0
				xFrac0, xFrac1 = 0, 1
			} else if sx1 > swMinus1 {
				sx0, sx1 = swMinus1, swMinus1
				xFrac0, xFrac1 = 1, 0
			}

			s00i := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx0) - src.Rect.Min.X)
			s00j := ((sr.Min.Y+int(sy0))/2-src.Rect.Min.Y/2)*src.CStride + ((sr.Min.X+int(sx0))/2 - src.Rect.Min.X/2)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			s00yy1 := int(src.Y[s00i]) * 0x10101
			s00cb1 := int(src.Cb[s00j]) - 128
			s00cr1 := int(src.Cr[s00j]) - 128
			s00ru := (s00yy1 + 91881*s00cr1) >> 8
			s00gu := (s00yy1 - 22554*s00cb1 - 46802*s00cr1) >> 8
			s00bu := (s00yy1 + 116130*s00cb1) >> 8
			if s00ru < 0 {
				s00ru = 0
			} else if s00ru > 0xffff {
				s00ru = 0xffff
			}
			if s00gu < 0 {
				s00gu = 0
			} else if s00gu > 0xffff {
				s00gu = 0xffff
			}
			if s00bu < 0 {
				s00bu = 0
			} else if s00bu > 0xffff {
				s00bu = 0xffff
			}

			s00r := float64(s00ru)
			s00g := float64(s00gu)
			s00b := float64(s00bu)
			s10i := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx1) - src.Rect.Min.X)
			s10j := ((sr.Min.Y+int(sy0))/2-src.Rect.Min.Y/2)*src.CStride + ((sr.Min.X+int(sx1))/2 - src.Rect.Min.X/2)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			s10yy1 := int(src.Y[s10i]) * 0x10101
			s10cb1 := int(src.Cb[s10j]) - 128
			s10cr1 := int(src.Cr[s10j]) - 128
			s10ru := (s10yy1 + 91881*s10cr1) >> 8
			s10gu := (s10yy1 - 22554*s10cb1 - 46802*s10cr1) >> 8
			s10bu := (s10yy1 + 116130*s10cb1) >> 8
			if s10ru < 0 {
				s10ru = 0
			} else if s10ru > 0xffff {
				s10ru = 0xffff
			}
			if s10gu < 0 {
				s10gu = 0
			} else if s10gu > 0xffff {
				s10gu = 0xffff
			}
			if s10bu < 0 {
				s10bu = 0
			} else if s10bu > 0xffff {
				s10bu = 0xffff
			}

			s10r := float64(s10ru)
			s10g := float64(s10gu)
			s10b := float64(s10bu)
			s10r = xFrac1*s00r + xFrac0*s10r
			s10g = xFrac1*s00g + xFrac0*s10g
			s10b = xFrac1*s00b + xFrac0*s10b
			s01i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx0) - src.Rect.Min.X)
			s01j := ((sr.Min.Y+int(sy1))/2-src.Rect.Min.Y/2)*src.CStride + ((sr.Min.X+int(sx0))/2 - src.Rect.Min.X/2)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			s01yy1 := int(src.Y[s01i]) * 0x10101
			s01cb1 := int(src.Cb[s01j]) - 128
			s01cr1 := int(src.Cr[s01j]) - 128
			s01ru := (s01yy1 + 91881*s01cr1) >> 8
			s01gu := (s01yy1 - 22554*s01cb1 - 46802*s01cr1) >> 8
			s01bu := (s01yy1 + 116130*s01cb1) >> 8
			if s01ru < 0 {
				s01ru = 0
			} else if s01ru > 0xffff {
				s01ru = 0xffff
			}
			if s01gu < 0 {
				s01gu = 0
			} else if s01gu > 0xffff {
				s01gu = 0xffff
			}
			if s01bu < 0 {
				s01bu = 0
			} else if s01bu > 0xffff {
				s01bu = 0xffff
			}

			s01r := float64(s01ru)
			s01g := float64(s01gu)
			s01b := float64(s01bu)
			s11i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx1) - src.Rect.Min.X)
			s11j := ((sr.Min.Y+int(sy1))/2-src.Rect.Min.Y/2)*src.CStride + ((sr.Min.X+int(sx1))/2 - src.Rect.Min.X/2)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			s11yy1 := int(src.Y[s11i]) * 0x10101
			s11cb1 := int(src.Cb[s11j]) - 128
			s11cr1 := int(src.Cr[s11j]) - 128
			s11ru := (s11yy1 + 91881*s11cr1) >> 8
			s11gu := (s11yy1 - 22554*s11cb1 - 46802*s11cr1) >> 8
			s11bu := (s11yy1 + 116130*s11cb1) >> 8
			if s11ru < 0 {
				s11ru = 0
			} else if s11ru > 0xffff {
				s11ru = 0xffff
			}
			if s11gu < 0 {
				s11gu = 0
			} else if s11gu > 0xffff {
				s11gu = 0xffff
			}
			if s11bu < 0 {
				s11bu = 0
			} else if s11bu > 0xffff {
				s11bu = 0xffff
			}

			s11r := float64(s11ru)
			s11g := float64(s11gu)
			s11b := float64(s11bu)
			s11r = xFrac1*s01r + xFrac0*s11r
			s11g = xFrac1*s01g + xFrac0*s11g
			s11b = xFrac1*s01b + xFrac0*s11b
			s11r = yFrac1*s10r + yFrac0*s11r
			s11g = yFrac1*s10g + yFrac0*s11g
			s11b = yFrac1*s10b + yFrac0*s11b
			pr := uint32(s11r)
			pg := uint32(s11g)
			pb := uint32(s11b)
			dst.Pix[d+0] = uint8(pr >> 8)
			dst.Pix[d+1] = uint8(pg >> 8)
			dst.Pix[d+2] = uint8(pb >> 8)
			dst.Pix[d+3] = 0xff
		}
	}
}

func (ablInterpolator) scale_RGBA_YCbCr440_Src(dst *image.RGBA, dr, adr image.Rectangle, src *image.YCbCr, sr image.Rectangle, opts *Options) {
	sw := int32(sr.Dx())
	sh := int32(sr.Dy())
	yscale := float64(sh) / float64(dr.Dy())
	xscale := float64(sw) / float64(dr.Dx())
	swMinus1, shMinus1 := sw-1, sh-1

	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		sy := (float64(dy)+0.5)*yscale - 0.5
		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
		// sx, below.
		sy0 := int32(sy)
		yFrac0 := sy - float64(sy0)
		yFrac1 := 1 - yFrac0
		sy1 := sy0 + 1
		if sy < 0 {
			sy0, sy1 = 0, 0
			yFrac0, yFrac1 = 0, 1
		} else if sy1 > shMinus1 {
			sy0, sy1 = shMinus1, shMinus1
			yFrac0, yFrac1 = 1, 0
		}
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4

		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			sx := (float64(dx)+0.5)*xscale - 0.5
			sx0 := int32(sx)
			xFrac0 := sx - float64(sx0)
			xFrac1 := 1 - xFrac0
			sx1 := sx0 + 1
			if sx < 0 {
				sx0, sx1 = 0, 0
				xFrac0, xFrac1 = 0, 1
			} else if sx1 > swMinus1 {
				sx0, sx1 = swMinus1, swMinus1
				xFrac0, xFrac1 = 1, 0
			}

			s00i := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx0) - src.Rect.Min.X)
			s00j := ((sr.Min.Y+int(sy0))/2-src.Rect.Min.Y/2)*src.CStride + (sr.Min.X + int(sx0) - src.Rect.Min.X)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			s00yy1 := int(src.Y[s00i]) * 0x10101
			s00cb1 := int(src.Cb[s00j]) - 128
			s00cr1 := int(src.Cr[s00j]) - 128
			s00ru := (s00yy1 + 91881*s00cr1) >> 8
			s00gu := (s00yy1 - 22554*s00cb1 - 46802*s00cr1) >> 8
			s00bu := (s00yy1 + 116130*s00cb1) >> 8
			if s00ru < 0 {
				s00ru = 0
			} else if s00ru > 0xffff {
				s00ru = 0xffff
			}
			if s00gu < 0 {
				s00gu = 0
			} else if s00gu > 0xffff {
				s00gu = 0xffff
			}
			if s00bu < 0 {
				s00bu = 0
			} else if s00bu > 0xffff {
				s00bu = 0xffff
			}

			s00r := float64(s00ru)
			s00g := float64(s00gu)
			s00b := float64(s00bu)
			s10i := (sr.Min.Y+int(sy0)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx1) - src.Rect.Min.X)
			s10j := ((sr.Min.Y+int(sy0))/2-src.Rect.Min.Y/2)*src.CStride + (sr.Min.X + int(sx1) - src.Rect.Min.X)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			s10yy1 := int(src.Y[s10i]) * 0x10101
			s10cb1 := int(src.Cb[s10j]) - 128
			s10cr1 := int(src.Cr[s10j]) - 128
			s10ru := (s10yy1 + 91881*s10cr1) >> 8
			s10gu := (s10yy1 - 22554*s10cb1 - 46802*s10cr1) >> 8
			s10bu := (s10yy1 + 116130*s10cb1) >> 8
			if s10ru < 0 {
				s10ru = 0
			} else if s10ru > 0xffff {
				s10ru = 0xffff
			}
			if s10gu < 0 {
				s10gu = 0
			} else if s10gu > 0xffff {
				s10gu = 0xffff
			}
			if s10bu < 0 {
				s10bu = 0
			} else if s10bu > 0xffff {
				s10bu = 0xffff
			}

			s10r := float64(s10ru)
			s10g := float64(s10gu)
			s10b := float64(s10bu)
			s10r = xFrac1*s00r + xFrac0*s10r
			s10g = xFrac1*s00g + xFrac0*s10g
			s10b = xFrac1*s00b + xFrac0*s10b
			s01i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx0) - src.Rect.Min.X)
			s01j := ((sr.Min.Y+int(sy1))/2-src.Rect.Min.Y/2)*src.CStride + (sr.Min.X + int(sx0) - src.Rect.Min.X)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			s01yy1 := int(src.Y[s01i]) * 0x10101
			s01cb1 := int(src.Cb[s01j]) - 128
			s01cr1 := int(src.Cr[s01j]) - 128
			s01ru := (s01yy1 + 91881*s01cr1) >> 8
			s01gu := (s01yy1 - 22554*s01cb1 - 46802*s01cr1) >> 8
			s01bu := (s01yy1 + 116130*s01cb1) >> 8
			if s01ru < 0 {
				s01ru = 0
			} else if s01ru > 0xffff {
				s01ru = 0xffff
			}
			if s01gu < 0 {
				s01gu = 0
			} else if s01gu > 0xffff {
				s01gu = 0xffff
			}
			if s01bu < 0 {
				s01bu = 0
			} else if s01bu > 0xffff {
				s01bu = 0xffff
			}

			s01r := float64(s01ru)
			s01g := float64(s01gu)
			s01b := float64(s01bu)
			s11i := (sr.Min.Y+int(sy1)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(sx1) - src.Rect.Min.X)
			s11j := ((sr.Min.Y+int(sy1))/2-src.Rect.Min.Y/2)*src.CStride + (sr.Min.X + int(sx1) - src.Rect.Min.X)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			s11yy1 := int(src.Y[s11i]) * 0x10101
			s11cb1 := int(src.Cb[s11j]) - 128
			s11cr1 := int(src.Cr[s11j]) - 128
			s11ru := (s11yy1 + 91881*s11cr1) >> 8
			s11gu := (s11yy1 - 22554*s11cb1 - 46802*s11cr1) >> 8
			s11bu := (s11yy1 + 116130*s11cb1) >> 8
			if s11ru < 0 {
				s11ru = 0
			} else if s11ru > 0xffff {
				s11ru = 0xffff
			}
			if s11gu < 0 {
				s11gu = 0
			} else if s11gu > 0xffff {
				s11gu = 0xffff
			}
			if s11bu < 0 {
				s11bu = 0
			} else if s11bu > 0xffff {
				s11bu = 0xffff
			}

			s11r := float64(s11ru)
			s11g := float64(s11gu)
			s11b := float64(s11bu)
			s11r = xFrac1*s01r + xFrac0*s11r
			s11g = xFrac1*s01g + xFrac0*s11g
			s11b = xFrac1*s01b + xFrac0*s11b
			s11r = yFrac1*s10r + yFrac0*s11r
			s11g = yFrac1*s10g + yFrac0*s11g
			s11b = yFrac1*s10b + yFrac0*s11b
			pr := uint32(s11r)
			pg := uint32(s11g)
			pb := uint32(s11b)
			dst.Pix[d+0] = uint8(pr >> 8)
			dst.Pix[d+1] = uint8(pg >> 8)
			dst.Pix[d+2] = uint8(pb >> 8)
			dst.Pix[d+3] = 0xff
		}
	}
}

func (ablInterpolator) scale_RGBA_Image_Over(dst *image.RGBA, dr, adr image.Rectangle, src image.Image, sr image.Rectangle, opts *Options) {
	sw := int32(sr.Dx())
	sh := int32(sr.Dy())
	yscale := float64(sh) / float64(dr.Dy())
	xscale := float64(sw) / float64(dr.Dx())
	swMinus1, shMinus1 := sw-1, sh-1

	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		sy := (float64(dy)+0.5)*yscale - 0.5
		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
		// sx, below.
		sy0 := int32(sy)
		yFrac0 := sy - float64(sy0)
		yFrac1 := 1 - yFrac0
		sy1 := sy0 + 1
		if sy < 0 {
			sy0, sy1 = 0, 0
			yFrac0, yFrac1 = 0, 1
		} else if sy1 > shMinus1 {
			sy0, sy1 = shMinus1, shMinus1
			yFrac0, yFrac1 = 1, 0
		}
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4

		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			sx := (float64(dx)+0.5)*xscale - 0.5
			sx0 := int32(sx)
			xFrac0 := sx - float64(sx0)
			xFrac1 := 1 - xFrac0
			sx1 := sx0 + 1
			if sx < 0 {
				sx0, sx1 = 0, 0
				xFrac0, xFrac1 = 0, 1
			} else if sx1 > swMinus1 {
				sx0, sx1 = swMinus1, swMinus1
				xFrac0, xFrac1 = 1, 0
			}

			s00ru, s00gu, s00bu, s00au := src.At(sr.Min.X+int(sx0), sr.Min.Y+int(sy0)).RGBA()
			s00r := float64(s00ru)
			s00g := float64(s00gu)
			s00b := float64(s00bu)
			s00a := float64(s00au)
			s10ru, s10gu, s10bu, s10au := src.At(sr.Min.X+int(sx1), sr.Min.Y+int(sy0)).RGBA()
			s10r := float64(s10ru)
			s10g := float64(s10gu)
			s10b := float64(s10bu)
			s10a := float64(s10au)
			s10r = xFrac1*s00r + xFrac0*s10r
			s10g = xFrac1*s00g + xFrac0*s10g
			s10b = xFrac1*s00b + xFrac0*s10b
			s10a = xFrac1*s00a + xFrac0*s10a
			s01ru, s01gu, s01bu, s01au := src.At(sr.Min.X+int(sx0), sr.Min.Y+int(sy1)).RGBA()
			s01r := float64(s01ru)
			s01g := float64(s01gu)
			s01b := float64(s01bu)
			s01a := float64(s01au)
			s11ru, s11gu, s11bu, s11au := src.At(sr.Min.X+int(sx1), sr.Min.Y+int(sy1)).RGBA()
			s11r := float64(s11ru)
			s11g := float64(s11gu)
			s11b := float64(s11bu)
			s11a := float64(s11au)
			s11r = xFrac1*s01r + xFrac0*s11r
			s11g = xFrac1*s01g + xFrac0*s11g
			s11b = xFrac1*s01b + xFrac0*s11b
			s11a = xFrac1*s01a + xFrac0*s11a
			s11r = yFrac1*s10r + yFrac0*s11r
			s11g = yFrac1*s10g + yFrac0*s11g
			s11b = yFrac1*s10b + yFrac0*s11b
			s11a = yFrac1*s10a + yFrac0*s11a
			pr := uint32(s11r)
			pg := uint32(s11g)
			pb := uint32(s11b)
			pa := uint32(s11a)
			pa1 := (0xffff - pa) * 0x101
			dst.Pix[d+0] = uint8((uint32(dst.Pix[d+0])*pa1/0xffff + pr) >> 8)
			dst.Pix[d+1] = uint8((uint32(dst.Pix[d+1])*pa1/0xffff + pg) >> 8)
			dst.Pix[d+2] = uint8((uint32(dst.Pix[d+2])*pa1/0xffff + pb) >> 8)
			dst.Pix[d+3] = uint8((uint32(dst.Pix[d+3])*pa1/0xffff + pa) >> 8)
		}
	}
}

func (ablInterpolator) scale_RGBA_Image_Src(dst *image.RGBA, dr, adr image.Rectangle, src image.Image, sr image.Rectangle, opts *Options) {
	sw := int32(sr.Dx())
	sh := int32(sr.Dy())
	yscale := float64(sh) / float64(dr.Dy())
	xscale := float64(sw) / float64(dr.Dx())
	swMinus1, shMinus1 := sw-1, sh-1

	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		sy := (float64(dy)+0.5)*yscale - 0.5
		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
		// sx, below.
		sy0 := int32(sy)
		yFrac0 := sy - float64(sy0)
		yFrac1 := 1 - yFrac0
		sy1 := sy0 + 1
		if sy < 0 {
			sy0, sy1 = 0, 0
			yFrac0, yFrac1 = 0, 1
		} else if sy1 > shMinus1 {
			sy0, sy1 = shMinus1, shMinus1
			yFrac0, yFrac1 = 1, 0
		}
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4

		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			sx := (float64(dx)+0.5)*xscale - 0.5
			sx0 := int32(sx)
			xFrac0 := sx - float64(sx0)
			xFrac1 := 1 - xFrac0
			sx1 := sx0 + 1
			if sx < 0 {
				sx0, sx1 = 0, 0
				xFrac0, xFrac1 = 0, 1
			} else if sx1 > swMinus1 {
				sx0, sx1 = swMinus1, swMinus1
				xFrac0, xFrac1 = 1, 0
			}

			s00ru, s00gu, s00bu, s00au := src.At(sr.Min.X+int(sx0), sr.Min.Y+int(sy0)).RGBA()
			s00r := float64(s00ru)
			s00g := float64(s00gu)
			s00b := float64(s00bu)
			s00a := float64(s00au)
			s10ru, s10gu, s10bu, s10au := src.At(sr.Min.X+int(sx1), sr.Min.Y+int(sy0)).RGBA()
			s10r := float64(s10ru)
			s10g := float64(s10gu)
			s10b := float64(s10bu)
			s10a := float64(s10au)
			s10r = xFrac1*s00r + xFrac0*s10r
			s10g = xFrac1*s00g + xFrac0*s10g
			s10b = xFrac1*s00b + xFrac0*s10b
			s10a = xFrac1*s00a + xFrac0*s10a
			s01ru, s01gu, s01bu, s01au := src.At(sr.Min.X+int(sx0), sr.Min.Y+int(sy1)).RGBA()
			s01r := float64(s01ru)
			s01g := float64(s01gu)
			s01b := float64(s01bu)
			s01a := float64(s01au)
			s11ru, s11gu, s11bu, s11au := src.At(sr.Min.X+int(sx1), sr.Min.Y+int(sy1)).RGBA()
			s11r := float64(s11ru)
			s11g := float64(s11gu)
			s11b := float64(s11bu)
			s11a := float64(s11au)
			s11r = xFrac1*s01r + xFrac0*s11r
			s11g = xFrac1*s01g + xFrac0*s11g
			s11b = xFrac1*s01b + xFrac0*s11b
			s11a = xFrac1*s01a + xFrac0*s11a
			s11r = yFrac1*s10r + yFrac0*s11r
			s11g = yFrac1*s10g + yFrac0*s11g
			s11b = yFrac1*s10b + yFrac0*s11b
			s11a = yFrac1*s10a + yFrac0*s11a
			pr := uint32(s11r)
			pg := uint32(s11g)
			pb := uint32(s11b)
			pa := uint32(s11a)
			dst.Pix[d+0] = uint8(pr >> 8)
			dst.Pix[d+1] = uint8(pg >> 8)
			dst.Pix[d+2] = uint8(pb >> 8)
			dst.Pix[d+3] = uint8(pa >> 8)
		}
	}
}

func (ablInterpolator) scale_Image_Image_Over(dst Image, dr, adr image.Rectangle, src image.Image, sr image.Rectangle, opts *Options) {
	sw := int32(sr.Dx())
	sh := int32(sr.Dy())
	yscale := float64(sh) / float64(dr.Dy())
	xscale := float64(sw) / float64(dr.Dx())
	swMinus1, shMinus1 := sw-1, sh-1
	srcMask, smp := opts.SrcMask, opts.SrcMaskP
	dstMask, dmp := opts.DstMask, opts.DstMaskP
	dstColorRGBA64 := &color.RGBA64{}
	dstColor := color.Color(dstColorRGBA64)

	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		sy := (float64(dy)+0.5)*yscale - 0.5
		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
		// sx, below.
		sy0 := int32(sy)
		yFrac0 := sy - float64(sy0)
		yFrac1 := 1 - yFrac0
		sy1 := sy0 + 1
		if sy < 0 {
			sy0, sy1 = 0, 0
			yFrac0, yFrac1 = 0, 1
		} else if sy1 > shMinus1 {
			sy0, sy1 = shMinus1, shMinus1
			yFrac0, yFrac1 = 1, 0
		}

		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
			sx := (float64(dx)+0.5)*xscale - 0.5
			sx0 := int32(sx)
			xFrac0 := sx - float64(sx0)
			xFrac1 := 1 - xFrac0
			sx1 := sx0 + 1
			if sx < 0 {
				sx0, sx1 = 0, 0
				xFrac0, xFrac1 = 0, 1
			} else if sx1 > swMinus1 {
				sx0, sx1 = swMinus1, swMinus1
				xFrac0, xFrac1 = 1, 0
			}

			s00ru, s00gu, s00bu, s00au := src.At(sr.Min.X+int(sx0), sr.Min.Y+int(sy0)).RGBA()
			if srcMask != nil {
				_, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx0), smp.Y+sr.Min.Y+int(sy0)).RGBA()
				s00ru = s00ru * ma / 0xffff
				s00gu = s00gu * ma / 0xffff
				s00bu = s00bu * ma / 0xffff
				s00au = s00au * ma / 0xffff
			}
			s00r := float64(s00ru)
			s00g := float64(s00gu)
			s00b := float64(s00bu)
			s00a := float64(s00au)
			s10ru, s10gu, s10bu, s10au := src.At(sr.Min.X+int(sx1), sr.Min.Y+int(sy0)).RGBA()
			if srcMask != nil {
				_, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx1), smp.Y+sr.Min.Y+int(sy0)).RGBA()
				s10ru = s10ru * ma / 0xffff
				s10gu = s10gu * ma / 0xffff
				s10bu = s10bu * ma / 0xffff
				s10au = s10au * ma / 0xffff
			}
			s10r := float64(s10ru)
			s10g := float64(s10gu)
			s10b := float64(s10bu)
			s10a := float64(s10au)
			s10r = xFrac1*s00r + xFrac0*s10r
			s10g = xFrac1*s00g + xFrac0*s10g
			s10b = xFrac1*s00b + xFrac0*s10b
			s10a = xFrac1*s00a + xFrac0*s10a
			s01ru, s01gu, s01bu, s01au := src.At(sr.Min.X+int(sx0), sr.Min.Y+int(sy1)).RGBA()
			if srcMask != nil {
				_, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx0), smp.Y+sr.Min.Y+int(sy1)).RGBA()
				s01ru = s01ru * ma / 0xffff
				s01gu = s01gu * ma / 0xffff
				s01bu = s01bu * ma / 0xffff
				s01au = s01au * ma / 0xffff
			}
			s01r := float64(s01ru)
			s01g := float64(s01gu)
			s01b := float64(s01bu)
			s01a := float64(s01au)
			s11ru, s11gu, s11bu, s11au := src.At(sr.Min.X+int(sx1), sr.Min.Y+int(sy1)).RGBA()
			if srcMask != nil {
				_, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx1), smp.Y+sr.Min.Y+int(sy1)).RGBA()
				s11ru = s11ru * ma / 0xffff
				s11gu = s11gu * ma / 0xffff
				s11bu = s11bu * ma / 0xffff
				s11au = s11au * ma / 0xffff
			}
			s11r := float64(s11ru)
			s11g := float64(s11gu)
			s11b := float64(s11bu)
			s11a := float64(s11au)
			s11r = xFrac1*s01r + xFrac0*s11r
			s11g = xFrac1*s01g + xFrac0*s11g
			s11b = xFrac1*s01b + xFrac0*s11b
			s11a = xFrac1*s01a + xFrac0*s11a
			s11r = yFrac1*s10r + yFrac0*s11r
			s11g = yFrac1*s10g + yFrac0*s11g
			s11b = yFrac1*s10b + yFrac0*s11b
			s11a = yFrac1*s10a + yFrac0*s11a
			pr := uint32(s11r)
			pg := uint32(s11g)
			pb := uint32(s11b)
			pa := uint32(s11a)
			qr, qg, qb, qa := dst.At(dr.Min.X+int(dx), dr.Min.Y+int(dy)).RGBA()
			if dstMask != nil {
				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA()
				pr = pr * ma / 0xffff
				pg = pg * ma / 0xffff
				pb = pb * ma / 0xffff
				pa = pa * ma / 0xffff
			}
			pa1 := 0xffff - pa
			dstColorRGBA64.R = uint16(qr*pa1/0xffff + pr)
			dstColorRGBA64.G = uint16(qg*pa1/0xffff + pg)
			dstColorRGBA64.B = uint16(qb*pa1/0xffff + pb)
			dstColorRGBA64.A = uint16(qa*pa1/0xffff + pa)
			dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColor)
		}
	}
}

func (ablInterpolator) scale_Image_Image_Src(dst Image, dr, adr image.Rectangle, src image.Image, sr image.Rectangle, opts *Options) {
	sw := int32(sr.Dx())
	sh := int32(sr.Dy())
	yscale := float64(sh) / float64(dr.Dy())
	xscale := float64(sw) / float64(dr.Dx())
	swMinus1, shMinus1 := sw-1, sh-1
	srcMask, smp := opts.SrcMask, opts.SrcMaskP
	dstMask, dmp := opts.DstMask, opts.DstMaskP
	dstColorRGBA64 := &color.RGBA64{}
	dstColor := color.Color(dstColorRGBA64)

	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		sy := (float64(dy)+0.5)*yscale - 0.5
		// If sy < 0, we will clamp sy0 to 0 anyway, so it doesn't matter if
		// we say int32(sy) instead of int32(math.Floor(sy)). Similarly for
		// sx, below.
		sy0 := int32(sy)
		yFrac0 := sy - float64(sy0)
		yFrac1 := 1 - yFrac0
		sy1 := sy0 + 1
		if sy < 0 {
			sy0, sy1 = 0, 0
			yFrac0, yFrac1 = 0, 1
		} else if sy1 > shMinus1 {
			sy0, sy1 = shMinus1, shMinus1
			yFrac0, yFrac1 = 1, 0
		}

		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
			sx := (float64(dx)+0.5)*xscale - 0.5
			sx0 := int32(sx)
			xFrac0 := sx - float64(sx0)
			xFrac1 := 1 - xFrac0
			sx1 := sx0 + 1
			if sx < 0 {
				sx0, sx1 = 0, 0
				xFrac0, xFrac1 = 0, 1
			} else if sx1 > swMinus1 {
				sx0, sx1 = swMinus1, swMinus1
				xFrac0, xFrac1 = 1, 0
			}

			s00ru, s00gu, s00bu, s00au := src.At(sr.Min.X+int(sx0), sr.Min.Y+int(sy0)).RGBA()
			if srcMask != nil {
				_, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx0), smp.Y+sr.Min.Y+int(sy0)).RGBA()
				s00ru = s00ru * ma / 0xffff
				s00gu = s00gu * ma / 0xffff
				s00bu = s00bu * ma / 0xffff
				s00au = s00au * ma / 0xffff
			}
			s00r := float64(s00ru)
			s00g := float64(s00gu)
			s00b := float64(s00bu)
			s00a := float64(s00au)
			s10ru, s10gu, s10bu, s10au := src.At(sr.Min.X+int(sx1), sr.Min.Y+int(sy0)).RGBA()
			if srcMask != nil {
				_, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx1), smp.Y+sr.Min.Y+int(sy0)).RGBA()
				s10ru = s10ru * ma / 0xffff
				s10gu = s10gu * ma / 0xffff
				s10bu = s10bu * ma / 0xffff
				s10au = s10au * ma / 0xffff
			}
			s10r := float64(s10ru)
			s10g := float64(s10gu)
			s10b := float64(s10bu)
			s10a := float64(s10au)
			s10r = xFrac1*s00r + xFrac0*s10r
			s10g = xFrac1*s00g + xFrac0*s10g
			s10b = xFrac1*s00b + xFrac0*s10b
			s10a = xFrac1*s00a + xFrac0*s10a
			s01ru, s01gu, s01bu, s01au := src.At(sr.Min.X+int(sx0), sr.Min.Y+int(sy1)).RGBA()
			if srcMask != nil {
				_, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx0), smp.Y+sr.Min.Y+int(sy1)).RGBA()
				s01ru = s01ru * ma / 0xffff
				s01gu = s01gu * ma / 0xffff
				s01bu = s01bu * ma / 0xffff
				s01au = s01au * ma / 0xffff
			}
			s01r := float64(s01ru)
			s01g := float64(s01gu)
			s01b := float64(s01bu)
			s01a := float64(s01au)
			s11ru, s11gu, s11bu, s11au := src.At(sr.Min.X+int(sx1), sr.Min.Y+int(sy1)).RGBA()
			if srcMask != nil {
				_, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(sx1), smp.Y+sr.Min.Y+int(sy1)).RGBA()
				s11ru = s11ru * ma / 0xffff
				s11gu = s11gu * ma / 0xffff
				s11bu = s11bu * ma / 0xffff
				s11au = s11au * ma / 0xffff
			}
			s11r := float64(s11ru)
			s11g := float64(s11gu)
			s11b := float64(s11bu)
			s11a := float64(s11au)
			s11r = xFrac1*s01r + xFrac0*s11r
			s11g = xFrac1*s01g + xFrac0*s11g
			s11b = xFrac1*s01b + xFrac0*s11b
			s11a = xFrac1*s01a + xFrac0*s11a
			s11r = yFrac1*s10r + yFrac0*s11r
			s11g = yFrac1*s10g + yFrac0*s11g
			s11b = yFrac1*s10b + yFrac0*s11b
			s11a = yFrac1*s10a + yFrac0*s11a
			pr := uint32(s11r)
			pg := uint32(s11g)
			pb := uint32(s11b)
			pa := uint32(s11a)
			if dstMask != nil {
				qr, qg, qb, qa := dst.At(dr.Min.X+int(dx), dr.Min.Y+int(dy)).RGBA()
				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA()
				pr = pr * ma / 0xffff
				pg = pg * ma / 0xffff
				pb = pb * ma / 0xffff
				pa = pa * ma / 0xffff
				pa1 := 0xffff - ma
				dstColorRGBA64.R = uint16(qr*pa1/0xffff + pr)
				dstColorRGBA64.G = uint16(qg*pa1/0xffff + pg)
				dstColorRGBA64.B = uint16(qb*pa1/0xffff + pb)
				dstColorRGBA64.A = uint16(qa*pa1/0xffff + pa)
				dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColor)
			} else {
				dstColorRGBA64.R = uint16(pr)
				dstColorRGBA64.G = uint16(pg)
				dstColorRGBA64.B = uint16(pb)
				dstColorRGBA64.A = uint16(pa)
				dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColor)
			}
		}
	}
}

func (ablInterpolator) transform_RGBA_Gray_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.Gray, sr image.Rectangle, bias image.Point, opts *Options) {
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
				continue
			}

			sx -= 0.5
			sx0 := int(sx)
			xFrac0 := sx - float64(sx0)
			xFrac1 := 1 - xFrac0
			sx0 += bias.X
			sx1 := sx0 + 1
			if sx0 < sr.Min.X {
				sx0, sx1 = sr.Min.X, sr.Min.X
				xFrac0, xFrac1 = 0, 1
			} else if sx1 >= sr.Max.X {
				sx0, sx1 = sr.Max.X-1, sr.Max.X-1
				xFrac0, xFrac1 = 1, 0
			}

			sy -= 0.5
			sy0 := int(sy)
			yFrac0 := sy - float64(sy0)
			yFrac1 := 1 - yFrac0
			sy0 += bias.Y
			sy1 := sy0 + 1
			if sy0 < sr.Min.Y {
				sy0, sy1 = sr.Min.Y, sr.Min.Y
				yFrac0, yFrac1 = 0, 1
			} else if sy1 >= sr.Max.Y {
				sy0, sy1 = sr.Max.Y-1, sr.Max.Y-1
				yFrac0, yFrac1 = 1, 0
			}

			s00i := (sy0-src.Rect.Min.Y)*src.Stride + (sx0 - src.Rect.Min.X)
			s00ru := uint32(src.Pix[s00i]) * 0x101
			s00r := float64(s00ru)
			s10i := (sy0-src.Rect.Min.Y)*src.Stride + (sx1 - src.Rect.Min.X)
			s10ru := uint32(src.Pix[s10i]) * 0x101
			s10r := float64(s10ru)
			s10r = xFrac1*s00r + xFrac0*s10r
			s01i := (sy1-src.Rect.Min.Y)*src.Stride + (sx0 - src.Rect.Min.X)
			s01ru := uint32(src.Pix[s01i]) * 0x101
			s01r := float64(s01ru)
			s11i := (sy1-src.Rect.Min.Y)*src.Stride + (sx1 - src.Rect.Min.X)
			s11ru := uint32(src.Pix[s11i]) * 0x101
			s11r := float64(s11ru)
			s11r = xFrac1*s01r + xFrac0*s11r
			s11r = yFrac1*s10r + yFrac0*s11r
			pr := uint32(s11r)
			out := uint8(pr >> 8)
			dst.Pix[d+0] = out
			dst.Pix[d+1] = out
			dst.Pix[d+2] = out
			dst.Pix[d+3] = 0xff
		}
	}
}

func (ablInterpolator) transform_RGBA_NRGBA_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.NRGBA, sr image.Rectangle, bias image.Point, opts *Options) {
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
				continue
			}

			sx -= 0.5
			sx0 := int(sx)
			xFrac0 := sx - float64(sx0)
			xFrac1 := 1 - xFrac0
			sx0 += bias.X
			sx1 := sx0 + 1
			if sx0 < sr.Min.X {
				sx0, sx1 = sr.Min.X, sr.Min.X
				xFrac0, xFrac1 = 0, 1
			} else if sx1 >= sr.Max.X {
				sx0, sx1 = sr.Max.X-1, sr.Max.X-1
				xFrac0, xFrac1 = 1, 0
			}

			sy -= 0.5
			sy0 := int(sy)
			yFrac0 := sy - float64(sy0)
			yFrac1 := 1 - yFrac0
			sy0 += bias.Y
			sy1 := sy0 + 1
			if sy0 < sr.Min.Y {
				sy0, sy1 = sr.Min.Y, sr.Min.Y
				yFrac0, yFrac1 = 0, 1
			} else if sy1 >= sr.Max.Y {
				sy0, sy1 = sr.Max.Y-1, sr.Max.Y-1
				yFrac0, yFrac1 = 1, 0
			}

			s00i := (sy0-src.Rect.Min.Y)*src.Stride + (sx0-src.Rect.Min.X)*4
			s00au := uint32(src.Pix[s00i+3]) * 0x101
			s00ru := uint32(src.Pix[s00i+0]) * s00au / 0xff
			s00gu := uint32(src.Pix[s00i+1]) * s00au / 0xff
			s00bu := uint32(src.Pix[s00i+2]) * s00au / 0xff
			s00r := float64(s00ru)
			s00g := float64(s00gu)
			s00b := float64(s00bu)
			s00a := float64(s00au)
			s10i := (sy0-src.Rect.Min.Y)*src.Stride + (sx1-src.Rect.Min.X)*4
			s10au := uint32(src.Pix[s10i+3]) * 0x101
			s10ru := uint32(src.Pix[s10i+0]) * s10au / 0xff
			s10gu := uint32(src.Pix[s10i+1]) * s10au / 0xff
			s10bu := uint32(src.Pix[s10i+2]) * s10au / 0xff
			s10r := float64(s10ru)
			s10g := float64(s10gu)
			s10b := float64(s10bu)
			s10a := float64(s10au)
			s10r = xFrac1*s00r + xFrac0*s10r
			s10g = xFrac1*s00g + xFrac0*s10g
			s10b = xFrac1*s00b + xFrac0*s10b
			s10a = xFrac1*s00a + xFrac0*s10a
			s01i := (sy1-src.Rect.Min.Y)*src.Stride + (sx0-src.Rect.Min.X)*4
			s01au := uint32(src.Pix[s01i+3]) * 0x101
			s01ru := uint32(src.Pix[s01i+0]) * s01au / 0xff
			s01gu := uint32(src.Pix[s01i+1]) * s01au / 0xff
			s01bu := uint32(src.Pix[s01i+2]) * s01au / 0xff
			s01r := float64(s01ru)
			s01g := float64(s01gu)
			s01b := float64(s01bu)
			s01a := float64(s01au)
			s11i := (sy1-src.Rect.Min.Y)*src.Stride + (sx1-src.Rect.Min.X)*4
			s11au := uint32(src.Pix[s11i+3]) * 0x101
			s11ru := uint32(src.Pix[s11i+0]) * s11au / 0xff
			s11gu := uint32(src.Pix[s11i+1]) * s11au / 0xff
			s11bu := uint32(src.Pix[s11i+2]) * s11au / 0xff
			s11r := float64(s11ru)
			s11g := float64(s11gu)
			s11b := float64(s11bu)
			s11a := float64(s11au)
			s11r = xFrac1*s01r + xFrac0*s11r
			s11g = xFrac1*s01g + xFrac0*s11g
			s11b = xFrac1*s01b + xFrac0*s11b
			s11a = xFrac1*s01a + xFrac0*s11a
			s11r = yFrac1*s10r + yFrac0*s11r
			s11g = yFrac1*s10g + yFrac0*s11g
			s11b = yFrac1*s10b + yFrac0*s11b
			s11a = yFrac1*s10a + yFrac0*s11a
			pr := uint32(s11r)
			pg := uint32(s11g)
			pb := uint32(s11b)
			pa := uint32(s11a)
			pa1 := (0xffff - pa) * 0x101
			dst.Pix[d+0] = uint8((uint32(dst.Pix[d+0])*pa1/0xffff + pr) >> 8)
			dst.Pix[d+1] = uint8((uint32(dst.Pix[d+1])*pa1/0xffff + pg) >> 8)
			dst.Pix[d+2] = uint8((uint32(dst.Pix[d+2])*pa1/0xffff + pb) >> 8)
			dst.Pix[d+3] = uint8((uint32(dst.Pix[d+3])*pa1/0xffff + pa) >> 8)
		}
	}
}

func (ablInterpolator) transform_RGBA_NRGBA_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.NRGBA, sr image.Rectangle, bias image.Point, opts *Options) {
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
				continue
			}

			sx -= 0.5
			sx0 := int(sx)
			xFrac0 := sx - float64(sx0)
			xFrac1 := 1 - xFrac0
			sx0 += bias.X
			sx1 := sx0 + 1
			if sx0 < sr.Min.X {
				sx0, sx1 = sr.Min.X, sr.Min.X
				xFrac0, xFrac1 = 0, 1
			} else if sx1 >= sr.Max.X {
				sx0, sx1 = sr.Max.X-1, sr.Max.X-1
				xFrac0, xFrac1 = 1, 0
			}

			sy -= 0.5
			sy0 := int(sy)
			yFrac0 := sy - float64(sy0)
			yFrac1 := 1 - yFrac0
			sy0 += bias.Y
			sy1 := sy0 + 1
			if sy0 < sr.Min.Y {
				sy0, sy1 = sr.Min.Y, sr.Min.Y
				yFrac0, yFrac1 = 0, 1
			} else if sy1 >= sr.Max.Y {
				sy0, sy1 = sr.Max.Y-1, sr.Max.Y-1
				yFrac0, yFrac1 = 1, 0
			}

			s00i := (sy0-src.Rect.Min.Y)*src.Stride + (sx0-src.Rect.Min.X)*4
			s00au := uint32(src.Pix[s00i+3]) * 0x101
			s00ru := uint32(src.Pix[s00i+0]) * s00au / 0xff
			s00gu := uint32(src.Pix[s00i+1]) * s00au / 0xff
			s00bu := uint32(src.Pix[s00i+2]) * s00au / 0xff
			s00r := float64(s00ru)
			s00g := float64(s00gu)
			s00b := float64(s00bu)
			s00a := float64(s00au)
			s10i := (sy0-src.Rect.Min.Y)*src.Stride + (sx1-src.Rect.Min.X)*4
			s10au := uint32(src.Pix[s10i+3]) * 0x101
			s10ru := uint32(src.Pix[s10i+0]) * s10au / 0xff
			s10gu := uint32(src.Pix[s10i+1]) * s10au / 0xff
			s10bu := uint32(src.Pix[s10i+2]) * s10au / 0xff
			s10r := float64(s10ru)
			s10g := float64(s10gu)
			s10b := float64(s10bu)
			s10a := float64(s10au)
			s10r = xFrac1*s00r + xFrac0*s10r
			s10g = xFrac1*s00g + xFrac0*s10g
			s10b = xFrac1*s00b + xFrac0*s10b
			s10a = xFrac1*s00a + xFrac0*s10a
			s01i := (sy1-src.Rect.Min.Y)*src.Stride + (sx0-src.Rect.Min.X)*4
			s01au := uint32(src.Pix[s01i+3]) * 0x101
			s01ru := uint32(src.Pix[s01i+0]) * s01au / 0xff
			s01gu := uint32(src.Pix[s01i+1]) * s01au / 0xff
			s01bu := uint32(src.Pix[s01i+2]) * s01au / 0xff
			s01r := float64(s01ru)
			s01g := float64(s01gu)
			s01b := float64(s01bu)
			s01a := float64(s01au)
			s11i := (sy1-src.Rect.Min.Y)*src.Stride + (sx1-src.Rect.Min.X)*4
			s11au := uint32(src.Pix[s11i+3]) * 0x101
			s11ru := uint32(src.Pix[s11i+0]) * s11au / 0xff
			s11gu := uint32(src.Pix[s11i+1]) * s11au / 0xff
			s11bu := uint32(src.Pix[s11i+2]) * s11au / 0xff
			s11r := float64(s11ru)
			s11g := float64(s11gu)
			s11b := float64(s11bu)
			s11a := float64(s11au)
			s11r = xFrac1*s01r + xFrac0*s11r
			s11g = xFrac1*s01g + xFrac0*s11g
			s11b = xFrac1*s01b + xFrac0*s11b
			s11a = xFrac1*s01a + xFrac0*s11a
			s11r = yFrac1*s10r + yFrac0*s11r
			s11g = yFrac1*s10g + yFrac0*s11g
			s11b = yFrac1*s10b + yFrac0*s11b
			s11a = yFrac1*s10a + yFrac0*s11a
			pr := uint32(s11r)
			pg := uint32(s11g)
			pb := uint32(s11b)
			pa := uint32(s11a)
			dst.Pix[d+0] = uint8(pr >> 8)
			dst.Pix[d+1] = uint8(pg >> 8)
			dst.Pix[d+2] = uint8(pb >> 8)
			dst.Pix[d+3] = uint8(pa >> 8)
		}
	}
}

func (ablInterpolator) transform_RGBA_RGBA_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.RGBA, sr image.Rectangle, bias image.Point, opts *Options) {
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
				continue
			}

			sx -= 0.5
			sx0 := int(sx)
			xFrac0 := sx - float64(sx0)
			xFrac1 := 1 - xFrac0
			sx0 += bias.X
			sx1 := sx0 + 1
			if sx0 < sr.Min.X {
				sx0, sx1 = sr.Min.X, sr.Min.X
				xFrac0, xFrac1 = 0, 1
			} else if sx1 >= sr.Max.X {
				sx0, sx1 = sr.Max.X-1, sr.Max.X-1
				xFrac0, xFrac1 = 1, 0
			}

			sy -= 0.5
			sy0 := int(sy)
			yFrac0 := sy - float64(sy0)
			yFrac1 := 1 - yFrac0
			sy0 += bias.Y
			sy1 := sy0 + 1
			if sy0 < sr.Min.Y {
				sy0, sy1 = sr.Min.Y, sr.Min.Y
				yFrac0, yFrac1 = 0, 1
			} else if sy1 >= sr.Max.Y {
				sy0, sy1 = sr.Max.Y-1, sr.Max.Y-1
				yFrac0, yFrac1 = 1, 0
			}

			s00i := (sy0-src.Rect.Min.Y)*src.Stride + (sx0-src.Rect.Min.X)*4
			s00ru := uint32(src.Pix[s00i+0]) * 0x101
			s00gu := uint32(src.Pix[s00i+1]) * 0x101
			s00bu := uint32(src.Pix[s00i+2]) * 0x101
			s00au := uint32(src.Pix[s00i+3]) * 0x101
			s00r := float64(s00ru)
			s00g := float64(s00gu)
			s00b := float64(s00bu)
			s00a := float64(s00au)
			s10i := (sy0-src.Rect.Min.Y)*src.Stride + (sx1-src.Rect.Min.X)*4
			s10ru := uint32(src.Pix[s10i+0]) * 0x101
			s10gu := uint32(src.Pix[s10i+1]) * 0x101
			s10bu := uint32(src.Pix[s10i+2]) * 0x101
			s10au := uint32(src.Pix[s10i+3]) * 0x101
			s10r := float64(s10ru)
			s10g := float64(s10gu)
			s10b := float64(s10bu)
			s10a := float64(s10au)
			s10r = xFrac1*s00r + xFrac0*s10r
			s10g = xFrac1*s00g + xFrac0*s10g
			s10b = xFrac1*s00b + xFrac0*s10b
			s10a = xFrac1*s00a + xFrac0*s10a
			s01i := (sy1-src.Rect.Min.Y)*src.Stride + (sx0-src.Rect.Min.X)*4
			s01ru := uint32(src.Pix[s01i+0]) * 0x101
			s01gu := uint32(src.Pix[s01i+1]) * 0x101
			s01bu := uint32(src.Pix[s01i+2]) * 0x101
			s01au := uint32(src.Pix[s01i+3]) * 0x101
			s01r := float64(s01ru)
			s01g := float64(s01gu)
			s01b := float64(s01bu)
			s01a := float64(s01au)
			s11i := (sy1-src.Rect.Min.Y)*src.Stride + (sx1-src.Rect.Min.X)*4
			s11ru := uint32(src.Pix[s11i+0]) * 0x101
			s11gu := uint32(src.Pix[s11i+1]) * 0x101
			s11bu := uint32(src.Pix[s11i+2]) * 0x101
			s11au := uint32(src.Pix[s11i+3]) * 0x101
			s11r := float64(s11ru)
			s11g := float64(s11gu)
			s11b := float64(s11bu)
			s11a := float64(s11au)
			s11r = xFrac1*s01r + xFrac0*s11r
			s11g = xFrac1*s01g + xFrac0*s11g
			s11b = xFrac1*s01b + xFrac0*s11b
			s11a = xFrac1*s01a + xFrac0*s11a
			s11r = yFrac1*s10r + yFrac0*s11r
			s11g = yFrac1*s10g + yFrac0*s11g
			s11b = yFrac1*s10b + yFrac0*s11b
			s11a = yFrac1*s10a + yFrac0*s11a
			pr := uint32(s11r)
			pg := uint32(s11g)
			pb := uint32(s11b)
			pa := uint32(s11a)
			pa1 := (0xffff - pa) * 0x101
			dst.Pix[d+0] = uint8((uint32(dst.Pix[d+0])*pa1/0xffff + pr) >> 8)
			dst.Pix[d+1] = uint8((uint32(dst.Pix[d+1])*pa1/0xffff + pg) >> 8)
			dst.Pix[d+2] = uint8((uint32(dst.Pix[d+2])*pa1/0xffff + pb) >> 8)
			dst.Pix[d+3] = uint8((uint32(dst.Pix[d+3])*pa1/0xffff + pa) >> 8)
		}
	}
}

func (ablInterpolator) transform_RGBA_RGBA_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.RGBA, sr image.Rectangle, bias image.Point, opts *Options) {
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
				continue
			}

			sx -= 0.5
			sx0 := int(sx)
			xFrac0 := sx - float64(sx0)
			xFrac1 := 1 - xFrac0
			sx0 += bias.X
			sx1 := sx0 + 1
			if sx0 < sr.Min.X {
				sx0, sx1 = sr.Min.X, sr.Min.X
				xFrac0, xFrac1 = 0, 1
			} else if sx1 >= sr.Max.X {
				sx0, sx1 = sr.Max.X-1, sr.Max.X-1
				xFrac0, xFrac1 = 1, 0
			}

			sy -= 0.5
			sy0 := int(sy)
			yFrac0 := sy - float64(sy0)
			yFrac1 := 1 - yFrac0
			sy0 += bias.Y
			sy1 := sy0 + 1
			if sy0 < sr.Min.Y {
				sy0, sy1 = sr.Min.Y, sr.Min.Y
				yFrac0, yFrac1 = 0, 1
			} else if sy1 >= sr.Max.Y {
				sy0, sy1 = sr.Max.Y-1, sr.Max.Y-1
				yFrac0, yFrac1 = 1, 0
			}

			s00i := (sy0-src.Rect.Min.Y)*src.Stride + (sx0-src.Rect.Min.X)*4
			s00ru := uint32(src.Pix[s00i+0]) * 0x101
			s00gu := uint32(src.Pix[s00i+1]) * 0x101
			s00bu := uint32(src.Pix[s00i+2]) * 0x101
			s00au := uint32(src.Pix[s00i+3]) * 0x101
			s00r := float64(s00ru)
			s00g := float64(s00gu)
			s00b := float64(s00bu)
			s00a := float64(s00au)
			s10i := (sy0-src.Rect.Min.Y)*src.Stride + (sx1-src.Rect.Min.X)*4
			s10ru := uint32(src.Pix[s10i+0]) * 0x101
			s10gu := uint32(src.Pix[s10i+1]) * 0x101
			s10bu := uint32(src.Pix[s10i+2]) * 0x101
			s10au := uint32(src.Pix[s10i+3]) * 0x101
			s10r := float64(s10ru)
			s10g := float64(s10gu)
			s10b := float64(s10bu)
			s10a := float64(s10au)
			s10r = xFrac1*s00r + xFrac0*s10r
			s10g = xFrac1*s00g + xFrac0*s10g
			s10b = xFrac1*s00b + xFrac0*s10b
			s10a = xFrac1*s00a + xFrac0*s10a
			s01i := (sy1-src.Rect.Min.Y)*src.Stride + (sx0-src.Rect.Min.X)*4
			s01ru := uint32(src.Pix[s01i+0]) * 0x101
			s01gu := uint32(src.Pix[s01i+1]) * 0x101
			s01bu := uint32(src.Pix[s01i+2]) * 0x101
			s01au := uint32(src.Pix[s01i+3]) * 0x101
			s01r := float64(s01ru)
			s01g := float64(s01gu)
			s01b := float64(s01bu)
			s01a := float64(s01au)
			s11i := (sy1-src.Rect.Min.Y)*src.Stride + (sx1-src.Rect.Min.X)*4
			s11ru := uint32(src.Pix[s11i+0]) * 0x101
			s11gu := uint32(src.Pix[s11i+1]) * 0x101
			s11bu := uint32(src.Pix[s11i+2]) * 0x101
			s11au := uint32(src.Pix[s11i+3]) * 0x101
			s11r := float64(s11ru)
			s11g := float64(s11gu)
			s11b := float64(s11bu)
			s11a := float64(s11au)
			s11r = xFrac1*s01r + xFrac0*s11r
			s11g = xFrac1*s01g + xFrac0*s11g
			s11b = xFrac1*s01b + xFrac0*s11b
			s11a = xFrac1*s01a + xFrac0*s11a
			s11r = yFrac1*s10r + yFrac0*s11r
			s11g = yFrac1*s10g + yFrac0*s11g
			s11b = yFrac1*s10b + yFrac0*s11b
			s11a = yFrac1*s10a + yFrac0*s11a
			pr := uint32(s11r)
			pg := uint32(s11g)
			pb := uint32(s11b)
			pa := uint32(s11a)
			dst.Pix[d+0] = uint8(pr >> 8)
			dst.Pix[d+1] = uint8(pg >> 8)
			dst.Pix[d+2] = uint8(pb >> 8)
			dst.Pix[d+3] = uint8(pa >> 8)
		}
	}
}

func (ablInterpolator) transform_RGBA_YCbCr444_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point, opts *Options) {
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
				continue
			}

			sx -= 0.5
			sx0 := int(sx)
			xFrac0 := sx - float64(sx0)
			xFrac1 := 1 - xFrac0
			sx0 += bias.X
			sx1 := sx0 + 1
			if sx0 < sr.Min.X {
				sx0, sx1 = sr.Min.X, sr.Min.X
				xFrac0, xFrac1 = 0, 1
			} else if sx1 >= sr.Max.X {
				sx0, sx1 = sr.Max.X-1, sr.Max.X-1
				xFrac0, xFrac1 = 1, 0
			}

			sy -= 0.5
			sy0 := int(sy)
			yFrac0 := sy - float64(sy0)
			yFrac1 := 1 - yFrac0
			sy0 += bias.Y
			sy1 := sy0 + 1
			if sy0 < sr.Min.Y {
				sy0, sy1 = sr.Min.Y, sr.Min.Y
				yFrac0, yFrac1 = 0, 1
			} else if sy1 >= sr.Max.Y {
				sy0, sy1 = sr.Max.Y-1, sr.Max.Y-1
				yFrac0, yFrac1 = 1, 0
			}

			s00i := (sy0-src.Rect.Min.Y)*src.YStride + (sx0 - src.Rect.Min.X)
			s00j := (sy0-src.Rect.Min.Y)*src.CStride + (sx0 - src.Rect.Min.X)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			s00yy1 := int(src.Y[s00i]) * 0x10101
			s00cb1 := int(src.Cb[s00j]) - 128
			s00cr1 := int(src.Cr[s00j]) - 128
			s00ru := (s00yy1 + 91881*s00cr1) >> 8
			s00gu := (s00yy1 - 22554*s00cb1 - 46802*s00cr1) >> 8
			s00bu := (s00yy1 + 116130*s00cb1) >> 8
			if s00ru < 0 {
				s00ru = 0
			} else if s00ru > 0xffff {
				s00ru = 0xffff
			}
			if s00gu < 0 {
				s00gu = 0
			} else if s00gu > 0xffff {
				s00gu = 0xffff
			}
			if s00bu < 0 {
				s00bu = 0
			} else if s00bu > 0xffff {
				s00bu = 0xffff
			}

			s00r := float64(s00ru)
			s00g := float64(s00gu)
			s00b := float64(s00bu)
			s10i := (sy0-src.Rect.Min.Y)*src.YStride + (sx1 - src.Rect.Min.X)
			s10j := (sy0-src.Rect.Min.Y)*src.CStride + (sx1 - src.Rect.Min.X)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			s10yy1 := int(src.Y[s10i]) * 0x10101
			s10cb1 := int(src.Cb[s10j]) - 128
			s10cr1 := int(src.Cr[s10j]) - 128
			s10ru := (s10yy1 + 91881*s10cr1) >> 8
			s10gu := (s10yy1 - 22554*s10cb1 - 46802*s10cr1) >> 8
			s10bu := (s10yy1 + 116130*s10cb1) >> 8
			if s10ru < 0 {
				s10ru = 0
			} else if s10ru > 0xffff {
				s10ru = 0xffff
			}
			if s10gu < 0 {
				s10gu = 0
			} else if s10gu > 0xffff {
				s10gu = 0xffff
			}
			if s10bu < 0 {
				s10bu = 0
			} else if s10bu > 0xffff {
				s10bu = 0xffff
			}

			s10r := float64(s10ru)
			s10g := float64(s10gu)
			s10b := float64(s10bu)
			s10r = xFrac1*s00r + xFrac0*s10r
			s10g = xFrac1*s00g + xFrac0*s10g
			s10b = xFrac1*s00b + xFrac0*s10b
			s01i := (sy1-src.Rect.Min.Y)*src.YStride + (sx0 - src.Rect.Min.X)
			s01j := (sy1-src.Rect.Min.Y)*src.CStride + (sx0 - src.Rect.Min.X)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			s01yy1 := int(src.Y[s01i]) * 0x10101
			s01cb1 := int(src.Cb[s01j]) - 128
			s01cr1 := int(src.Cr[s01j]) - 128
			s01ru := (s01yy1 + 91881*s01cr1) >> 8
			s01gu := (s01yy1 - 22554*s01cb1 - 46802*s01cr1) >> 8
			s01bu := (s01yy1 + 116130*s01cb1) >> 8
			if s01ru < 0 {
				s01ru = 0
			} else if s01ru > 0xffff {
				s01ru = 0xffff
			}
			if s01gu < 0 {
				s01gu = 0
			} else if s01gu > 0xffff {
				s01gu = 0xffff
			}
			if s01bu < 0 {
				s01bu = 0
			} else if s01bu > 0xffff {
				s01bu = 0xffff
			}

			s01r := float64(s01ru)
			s01g := float64(s01gu)
			s01b := float64(s01bu)
			s11i := (sy1-src.Rect.Min.Y)*src.YStride + (sx1 - src.Rect.Min.X)
			s11j := (sy1-src.Rect.Min.Y)*src.CStride + (sx1 - src.Rect.Min.X)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			s11yy1 := int(src.Y[s11i]) * 0x10101
			s11cb1 := int(src.Cb[s11j]) - 128
			s11cr1 := int(src.Cr[s11j]) - 128
			s11ru := (s11yy1 + 91881*s11cr1) >> 8
			s11gu := (s11yy1 - 22554*s11cb1 - 46802*s11cr1) >> 8
			s11bu := (s11yy1 + 116130*s11cb1) >> 8
			if s11ru < 0 {
				s11ru = 0
			} else if s11ru > 0xffff {
				s11ru = 0xffff
			}
			if s11gu < 0 {
				s11gu = 0
			} else if s11gu > 0xffff {
				s11gu = 0xffff
			}
			if s11bu < 0 {
				s11bu = 0
			} else if s11bu > 0xffff {
				s11bu = 0xffff
			}

			s11r := float64(s11ru)
			s11g := float64(s11gu)
			s11b := float64(s11bu)
			s11r = xFrac1*s01r + xFrac0*s11r
			s11g = xFrac1*s01g + xFrac0*s11g
			s11b = xFrac1*s01b + xFrac0*s11b
			s11r = yFrac1*s10r + yFrac0*s11r
			s11g = yFrac1*s10g + yFrac0*s11g
			s11b = yFrac1*s10b + yFrac0*s11b
			pr := uint32(s11r)
			pg := uint32(s11g)
			pb := uint32(s11b)
			dst.Pix[d+0] = uint8(pr >> 8)
			dst.Pix[d+1] = uint8(pg >> 8)
			dst.Pix[d+2] = uint8(pb >> 8)
			dst.Pix[d+3] = 0xff
		}
	}
}

func (ablInterpolator) transform_RGBA_YCbCr422_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point, opts *Options) {
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
				continue
			}

			sx -= 0.5
			sx0 := int(sx)
			xFrac0 := sx - float64(sx0)
			xFrac1 := 1 - xFrac0
			sx0 += bias.X
			sx1 := sx0 + 1
			if sx0 < sr.Min.X {
				sx0, sx1 = sr.Min.X, sr.Min.X
				xFrac0, xFrac1 = 0, 1
			} else if sx1 >= sr.Max.X {
				sx0, sx1 = sr.Max.X-1, sr.Max.X-1
				xFrac0, xFrac1 = 1, 0
			}

			sy -= 0.5
			sy0 := int(sy)
			yFrac0 := sy - float64(sy0)
			yFrac1 := 1 - yFrac0
			sy0 += bias.Y
			sy1 := sy0 + 1
			if sy0 < sr.Min.Y {
				sy0, sy1 = sr.Min.Y, sr.Min.Y
				yFrac0, yFrac1 = 0, 1
			} else if sy1 >= sr.Max.Y {
				sy0, sy1 = sr.Max.Y-1, sr.Max.Y-1
				yFrac0, yFrac1 = 1, 0
			}

			s00i := (sy0-src.Rect.Min.Y)*src.YStride + (sx0 - src.Rect.Min.X)
			s00j := (sy0-src.Rect.Min.Y)*src.CStride + ((sx0)/2 - src.Rect.Min.X/2)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			s00yy1 := int(src.Y[s00i]) * 0x10101
			s00cb1 := int(src.Cb[s00j]) - 128
			s00cr1 := int(src.Cr[s00j]) - 128
			s00ru := (s00yy1 + 91881*s00cr1) >> 8
			s00gu := (s00yy1 - 22554*s00cb1 - 46802*s00cr1) >> 8
			s00bu := (s00yy1 + 116130*s00cb1) >> 8
			if s00ru < 0 {
				s00ru = 0
			} else if s00ru > 0xffff {
				s00ru = 0xffff
			}
			if s00gu < 0 {
				s00gu = 0
			} else if s00gu > 0xffff {
				s00gu = 0xffff
			}
			if s00bu < 0 {
				s00bu = 0
			} else if s00bu > 0xffff {
				s00bu = 0xffff
			}

			s00r := float64(s00ru)
			s00g := float64(s00gu)
			s00b := float64(s00bu)
			s10i := (sy0-src.Rect.Min.Y)*src.YStride + (sx1 - src.Rect.Min.X)
			s10j := (sy0-src.Rect.Min.Y)*src.CStride + ((sx1)/2 - src.Rect.Min.X/2)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			s10yy1 := int(src.Y[s10i]) * 0x10101
			s10cb1 := int(src.Cb[s10j]) - 128
			s10cr1 := int(src.Cr[s10j]) - 128
			s10ru := (s10yy1 + 91881*s10cr1) >> 8
			s10gu := (s10yy1 - 22554*s10cb1 - 46802*s10cr1) >> 8
			s10bu := (s10yy1 + 116130*s10cb1) >> 8
			if s10ru < 0 {
				s10ru = 0
			} else if s10ru > 0xffff {
				s10ru = 0xffff
			}
			if s10gu < 0 {
				s10gu = 0
			} else if s10gu > 0xffff {
				s10gu = 0xffff
			}
			if s10bu < 0 {
				s10bu = 0
			} else if s10bu > 0xffff {
				s10bu = 0xffff
			}

			s10r := float64(s10ru)
			s10g := float64(s10gu)
			s10b := float64(s10bu)
			s10r = xFrac1*s00r + xFrac0*s10r
			s10g = xFrac1*s00g + xFrac0*s10g
			s10b = xFrac1*s00b + xFrac0*s10b
			s01i := (sy1-src.Rect.Min.Y)*src.YStride + (sx0 - src.Rect.Min.X)
			s01j := (sy1-src.Rect.Min.Y)*src.CStride + ((sx0)/2 - src.Rect.Min.X/2)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			s01yy1 := int(src.Y[s01i]) * 0x10101
			s01cb1 := int(src.Cb[s01j]) - 128
			s01cr1 := int(src.Cr[s01j]) - 128
			s01ru := (s01yy1 + 91881*s01cr1) >> 8
			s01gu := (s01yy1 - 22554*s01cb1 - 46802*s01cr1) >> 8
			s01bu := (s01yy1 + 116130*s01cb1) >> 8
			if s01ru < 0 {
				s01ru = 0
			} else if s01ru > 0xffff {
				s01ru = 0xffff
			}
			if s01gu < 0 {
				s01gu = 0
			} else if s01gu > 0xffff {
				s01gu = 0xffff
			}
			if s01bu < 0 {
				s01bu = 0
			} else if s01bu > 0xffff {
				s01bu = 0xffff
			}

			s01r := float64(s01ru)
			s01g := float64(s01gu)
			s01b := float64(s01bu)
			s11i := (sy1-src.Rect.Min.Y)*src.YStride + (sx1 - src.Rect.Min.X)
			s11j := (sy1-src.Rect.Min.Y)*src.CStride + ((sx1)/2 - src.Rect.Min.X/2)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			s11yy1 := int(src.Y[s11i]) * 0x10101
			s11cb1 := int(src.Cb[s11j]) - 128
			s11cr1 := int(src.Cr[s11j]) - 128
			s11ru := (s11yy1 + 91881*s11cr1) >> 8
			s11gu := (s11yy1 - 22554*s11cb1 - 46802*s11cr1) >> 8
			s11bu := (s11yy1 + 116130*s11cb1) >> 8
			if s11ru < 0 {
				s11ru = 0
			} else if s11ru > 0xffff {
				s11ru = 0xffff
			}
			if s11gu < 0 {
				s11gu = 0
			} else if s11gu > 0xffff {
				s11gu = 0xffff
			}
			if s11bu < 0 {
				s11bu = 0
			} else if s11bu > 0xffff {
				s11bu = 0xffff
			}

			s11r := float64(s11ru)
			s11g := float64(s11gu)
			s11b := float64(s11bu)
			s11r = xFrac1*s01r + xFrac0*s11r
			s11g = xFrac1*s01g + xFrac0*s11g
			s11b = xFrac1*s01b + xFrac0*s11b
			s11r = yFrac1*s10r + yFrac0*s11r
			s11g = yFrac1*s10g + yFrac0*s11g
			s11b = yFrac1*s10b + yFrac0*s11b
			pr := uint32(s11r)
			pg := uint32(s11g)
			pb := uint32(s11b)
			dst.Pix[d+0] = uint8(pr >> 8)
			dst.Pix[d+1] = uint8(pg >> 8)
			dst.Pix[d+2] = uint8(pb >> 8)
			dst.Pix[d+3] = 0xff
		}
	}
}

func (ablInterpolator) transform_RGBA_YCbCr420_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point, opts *Options) {
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
				continue
			}

			sx -= 0.5
			sx0 := int(sx)
			xFrac0 := sx - float64(sx0)
			xFrac1 := 1 - xFrac0
			sx0 += bias.X
			sx1 := sx0 + 1
			if sx0 < sr.Min.X {
				sx0, sx1 = sr.Min.X, sr.Min.X
				xFrac0, xFrac1 = 0, 1
			} else if sx1 >= sr.Max.X {
				sx0, sx1 = sr.Max.X-1, sr.Max.X-1
				xFrac0, xFrac1 = 1, 0
			}

			sy -= 0.5
			sy0 := int(sy)
			yFrac0 := sy - float64(sy0)
			yFrac1 := 1 - yFrac0
			sy0 += bias.Y
			sy1 := sy0 + 1
			if sy0 < sr.Min.Y {
				sy0, sy1 = sr.Min.Y, sr.Min.Y
				yFrac0, yFrac1 = 0, 1
			} else if sy1 >= sr.Max.Y {
				sy0, sy1 = sr.Max.Y-1, sr.Max.Y-1
				yFrac0, yFrac1 = 1, 0
			}

			s00i := (sy0-src.Rect.Min.Y)*src.YStride + (sx0 - src.Rect.Min.X)
			s00j := ((sy0)/2-src.Rect.Min.Y/2)*src.CStride + ((sx0)/2 - src.Rect.Min.X/2)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			s00yy1 := int(src.Y[s00i]) * 0x10101
			s00cb1 := int(src.Cb[s00j]) - 128
			s00cr1 := int(src.Cr[s00j]) - 128
			s00ru := (s00yy1 + 91881*s00cr1) >> 8
			s00gu := (s00yy1 - 22554*s00cb1 - 46802*s00cr1) >> 8
			s00bu := (s00yy1 + 116130*s00cb1) >> 8
			if s00ru < 0 {
				s00ru = 0
			} else if s00ru > 0xffff {
				s00ru = 0xffff
			}
			if s00gu < 0 {
				s00gu = 0
			} else if s00gu > 0xffff {
				s00gu = 0xffff
			}
			if s00bu < 0 {
				s00bu = 0
			} else if s00bu > 0xffff {
				s00bu = 0xffff
			}

			s00r := float64(s00ru)
			s00g := float64(s00gu)
			s00b := float64(s00bu)
			s10i := (sy0-src.Rect.Min.Y)*src.YStride + (sx1 - src.Rect.Min.X)
			s10j := ((sy0)/2-src.Rect.Min.Y/2)*src.CStride + ((sx1)/2 - src.Rect.Min.X/2)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			s10yy1 := int(src.Y[s10i]) * 0x10101
			s10cb1 := int(src.Cb[s10j]) - 128
			s10cr1 := int(src.Cr[s10j]) - 128
			s10ru := (s10yy1 + 91881*s10cr1) >> 8
			s10gu := (s10yy1 - 22554*s10cb1 - 46802*s10cr1) >> 8
			s10bu := (s10yy1 + 116130*s10cb1) >> 8
			if s10ru < 0 {
				s10ru = 0
			} else if s10ru > 0xffff {
				s10ru = 0xffff
			}
			if s10gu < 0 {
				s10gu = 0
			} else if s10gu > 0xffff {
				s10gu = 0xffff
			}
			if s10bu < 0 {
				s10bu = 0
			} else if s10bu > 0xffff {
				s10bu = 0xffff
			}

			s10r := float64(s10ru)
			s10g := float64(s10gu)
			s10b := float64(s10bu)
			s10r = xFrac1*s00r + xFrac0*s10r
			s10g = xFrac1*s00g + xFrac0*s10g
			s10b = xFrac1*s00b + xFrac0*s10b
			s01i := (sy1-src.Rect.Min.Y)*src.YStride + (sx0 - src.Rect.Min.X)
			s01j := ((sy1)/2-src.Rect.Min.Y/2)*src.CStride + ((sx0)/2 - src.Rect.Min.X/2)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			s01yy1 := int(src.Y[s01i]) * 0x10101
			s01cb1 := int(src.Cb[s01j]) - 128
			s01cr1 := int(src.Cr[s01j]) - 128
			s01ru := (s01yy1 + 91881*s01cr1) >> 8
			s01gu := (s01yy1 - 22554*s01cb1 - 46802*s01cr1) >> 8
			s01bu := (s01yy1 + 116130*s01cb1) >> 8
			if s01ru < 0 {
				s01ru = 0
			} else if s01ru > 0xffff {
				s01ru = 0xffff
			}
			if s01gu < 0 {
				s01gu = 0
			} else if s01gu > 0xffff {
				s01gu = 0xffff
			}
			if s01bu < 0 {
				s01bu = 0
			} else if s01bu > 0xffff {
				s01bu = 0xffff
			}

			s01r := float64(s01ru)
			s01g := float64(s01gu)
			s01b := float64(s01bu)
			s11i := (sy1-src.Rect.Min.Y)*src.YStride + (sx1 - src.Rect.Min.X)
			s11j := ((sy1)/2-src.Rect.Min.Y/2)*src.CStride + ((sx1)/2 - src.Rect.Min.X/2)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			s11yy1 := int(src.Y[s11i]) * 0x10101
			s11cb1 := int(src.Cb[s11j]) - 128
			s11cr1 := int(src.Cr[s11j]) - 128
			s11ru := (s11yy1 + 91881*s11cr1) >> 8
			s11gu := (s11yy1 - 22554*s11cb1 - 46802*s11cr1) >> 8
			s11bu := (s11yy1 + 116130*s11cb1) >> 8
			if s11ru < 0 {
				s11ru = 0
			} else if s11ru > 0xffff {
				s11ru = 0xffff
			}
			if s11gu < 0 {
				s11gu = 0
			} else if s11gu > 0xffff {
				s11gu = 0xffff
			}
			if s11bu < 0 {
				s11bu = 0
			} else if s11bu > 0xffff {
				s11bu = 0xffff
			}

			s11r := float64(s11ru)
			s11g := float64(s11gu)
			s11b := float64(s11bu)
			s11r = xFrac1*s01r + xFrac0*s11r
			s11g = xFrac1*s01g + xFrac0*s11g
			s11b = xFrac1*s01b + xFrac0*s11b
			s11r = yFrac1*s10r + yFrac0*s11r
			s11g = yFrac1*s10g + yFrac0*s11g
			s11b = yFrac1*s10b + yFrac0*s11b
			pr := uint32(s11r)
			pg := uint32(s11g)
			pb := uint32(s11b)
			dst.Pix[d+0] = uint8(pr >> 8)
			dst.Pix[d+1] = uint8(pg >> 8)
			dst.Pix[d+2] = uint8(pb >> 8)
			dst.Pix[d+3] = 0xff
		}
	}
}

func (ablInterpolator) transform_RGBA_YCbCr440_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point, opts *Options) {
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
				continue
			}

			sx -= 0.5
			sx0 := int(sx)
			xFrac0 := sx - float64(sx0)
			xFrac1 := 1 - xFrac0
			sx0 += bias.X
			sx1 := sx0 + 1
			if sx0 < sr.Min.X {
				sx0, sx1 = sr.Min.X, sr.Min.X
				xFrac0, xFrac1 = 0, 1
			} else if sx1 >= sr.Max.X {
				sx0, sx1 = sr.Max.X-1, sr.Max.X-1
				xFrac0, xFrac1 = 1, 0
			}

			sy -= 0.5
			sy0 := int(sy)
			yFrac0 := sy - float64(sy0)
			yFrac1 := 1 - yFrac0
			sy0 += bias.Y
			sy1 := sy0 + 1
			if sy0 < sr.Min.Y {
				sy0, sy1 = sr.Min.Y, sr.Min.Y
				yFrac0, yFrac1 = 0, 1
			} else if sy1 >= sr.Max.Y {
				sy0, sy1 = sr.Max.Y-1, sr.Max.Y-1
				yFrac0, yFrac1 = 1, 0
			}

			s00i := (sy0-src.Rect.Min.Y)*src.YStride + (sx0 - src.Rect.Min.X)
			s00j := ((sy0)/2-src.Rect.Min.Y/2)*src.CStride + (sx0 - src.Rect.Min.X)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			s00yy1 := int(src.Y[s00i]) * 0x10101
			s00cb1 := int(src.Cb[s00j]) - 128
			s00cr1 := int(src.Cr[s00j]) - 128
			s00ru := (s00yy1 + 91881*s00cr1) >> 8
			s00gu := (s00yy1 - 22554*s00cb1 - 46802*s00cr1) >> 8
			s00bu := (s00yy1 + 116130*s00cb1) >> 8
			if s00ru < 0 {
				s00ru = 0
			} else if s00ru > 0xffff {
				s00ru = 0xffff
			}
			if s00gu < 0 {
				s00gu = 0
			} else if s00gu > 0xffff {
				s00gu = 0xffff
			}
			if s00bu < 0 {
				s00bu = 0
			} else if s00bu > 0xffff {
				s00bu = 0xffff
			}

			s00r := float64(s00ru)
			s00g := float64(s00gu)
			s00b := float64(s00bu)
			s10i := (sy0-src.Rect.Min.Y)*src.YStride + (sx1 - src.Rect.Min.X)
			s10j := ((sy0)/2-src.Rect.Min.Y/2)*src.CStride + (sx1 - src.Rect.Min.X)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			s10yy1 := int(src.Y[s10i]) * 0x10101
			s10cb1 := int(src.Cb[s10j]) - 128
			s10cr1 := int(src.Cr[s10j]) - 128
			s10ru := (s10yy1 + 91881*s10cr1) >> 8
			s10gu := (s10yy1 - 22554*s10cb1 - 46802*s10cr1) >> 8
			s10bu := (s10yy1 + 116130*s10cb1) >> 8
			if s10ru < 0 {
				s10ru = 0
			} else if s10ru > 0xffff {
				s10ru = 0xffff
			}
			if s10gu < 0 {
				s10gu = 0
			} else if s10gu > 0xffff {
				s10gu = 0xffff
			}
			if s10bu < 0 {
				s10bu = 0
			} else if s10bu > 0xffff {
				s10bu = 0xffff
			}

			s10r := float64(s10ru)
			s10g := float64(s10gu)
			s10b := float64(s10bu)
			s10r = xFrac1*s00r + xFrac0*s10r
			s10g = xFrac1*s00g + xFrac0*s10g
			s10b = xFrac1*s00b + xFrac0*s10b
			s01i := (sy1-src.Rect.Min.Y)*src.YStride + (sx0 - src.Rect.Min.X)
			s01j := ((sy1)/2-src.Rect.Min.Y/2)*src.CStride + (sx0 - src.Rect.Min.X)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			s01yy1 := int(src.Y[s01i]) * 0x10101
			s01cb1 := int(src.Cb[s01j]) - 128
			s01cr1 := int(src.Cr[s01j]) - 128
			s01ru := (s01yy1 + 91881*s01cr1) >> 8
			s01gu := (s01yy1 - 22554*s01cb1 - 46802*s01cr1) >> 8
			s01bu := (s01yy1 + 116130*s01cb1) >> 8
			if s01ru < 0 {
				s01ru = 0
			} else if s01ru > 0xffff {
				s01ru = 0xffff
			}
			if s01gu < 0 {
				s01gu = 0
			} else if s01gu > 0xffff {
				s01gu = 0xffff
			}
			if s01bu < 0 {
				s01bu = 0
			} else if s01bu > 0xffff {
				s01bu = 0xffff
			}

			s01r := float64(s01ru)
			s01g := float64(s01gu)
			s01b := float64(s01bu)
			s11i := (sy1-src.Rect.Min.Y)*src.YStride + (sx1 - src.Rect.Min.X)
			s11j := ((sy1)/2-src.Rect.Min.Y/2)*src.CStride + (sx1 - src.Rect.Min.X)

			// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
			s11yy1 := int(src.Y[s11i]) * 0x10101
			s11cb1 := int(src.Cb[s11j]) - 128
			s11cr1 := int(src.Cr[s11j]) - 128
			s11ru := (s11yy1 + 91881*s11cr1) >> 8
			s11gu := (s11yy1 - 22554*s11cb1 - 46802*s11cr1) >> 8
			s11bu := (s11yy1 + 116130*s11cb1) >> 8
			if s11ru < 0 {
				s11ru = 0
			} else if s11ru > 0xffff {
				s11ru = 0xffff
			}
			if s11gu < 0 {
				s11gu = 0
			} else if s11gu > 0xffff {
				s11gu = 0xffff
			}
			if s11bu < 0 {
				s11bu = 0
			} else if s11bu > 0xffff {
				s11bu = 0xffff
			}

			s11r := float64(s11ru)
			s11g := float64(s11gu)
			s11b := float64(s11bu)
			s11r = xFrac1*s01r + xFrac0*s11r
			s11g = xFrac1*s01g + xFrac0*s11g
			s11b = xFrac1*s01b + xFrac0*s11b
			s11r = yFrac1*s10r + yFrac0*s11r
			s11g = yFrac1*s10g + yFrac0*s11g
			s11b = yFrac1*s10b + yFrac0*s11b
			pr := uint32(s11r)
			pg := uint32(s11g)
			pb := uint32(s11b)
			dst.Pix[d+0] = uint8(pr >> 8)
			dst.Pix[d+1] = uint8(pg >> 8)
			dst.Pix[d+2] = uint8(pb >> 8)
			dst.Pix[d+3] = 0xff
		}
	}
}

func (ablInterpolator) transform_RGBA_Image_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point, opts *Options) {
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
				continue
			}

			sx -= 0.5
			sx0 := int(sx)
			xFrac0 := sx - float64(sx0)
			xFrac1 := 1 - xFrac0
			sx0 += bias.X
			sx1 := sx0 + 1
			if sx0 < sr.Min.X {
				sx0, sx1 = sr.Min.X, sr.Min.X
				xFrac0, xFrac1 = 0, 1
			} else if sx1 >= sr.Max.X {
				sx0, sx1 = sr.Max.X-1, sr.Max.X-1
				xFrac0, xFrac1 = 1, 0
			}

			sy -= 0.5
			sy0 := int(sy)
			yFrac0 := sy - float64(sy0)
			yFrac1 := 1 - yFrac0
			sy0 += bias.Y
			sy1 := sy0 + 1
			if sy0 < sr.Min.Y {
				sy0, sy1 = sr.Min.Y, sr.Min.Y
				yFrac0, yFrac1 = 0, 1
			} else if sy1 >= sr.Max.Y {
				sy0, sy1 = sr.Max.Y-1, sr.Max.Y-1
				yFrac0, yFrac1 = 1, 0
			}

			s00ru, s00gu, s00bu, s00au := src.At(sx0, sy0).RGBA()
			s00r := float64(s00ru)
			s00g := float64(s00gu)
			s00b := float64(s00bu)
			s00a := float64(s00au)
			s10ru, s10gu, s10bu, s10au := src.At(sx1, sy0).RGBA()
			s10r := float64(s10ru)
			s10g := float64(s10gu)
			s10b := float64(s10bu)
			s10a := float64(s10au)
			s10r = xFrac1*s00r + xFrac0*s10r
			s10g = xFrac1*s00g + xFrac0*s10g
			s10b = xFrac1*s00b + xFrac0*s10b
			s10a = xFrac1*s00a + xFrac0*s10a
			s01ru, s01gu, s01bu, s01au := src.At(sx0, sy1).RGBA()
			s01r := float64(s01ru)
			s01g := float64(s01gu)
			s01b := float64(s01bu)
			s01a := float64(s01au)
			s11ru, s11gu, s11bu, s11au := src.At(sx1, sy1).RGBA()
			s11r := float64(s11ru)
			s11g := float64(s11gu)
			s11b := float64(s11bu)
			s11a := float64(s11au)
			s11r = xFrac1*s01r + xFrac0*s11r
			s11g = xFrac1*s01g + xFrac0*s11g
			s11b = xFrac1*s01b + xFrac0*s11b
			s11a = xFrac1*s01a + xFrac0*s11a
			s11r = yFrac1*s10r + yFrac0*s11r
			s11g = yFrac1*s10g + yFrac0*s11g
			s11b = yFrac1*s10b + yFrac0*s11b
			s11a = yFrac1*s10a + yFrac0*s11a
			pr := uint32(s11r)
			pg := uint32(s11g)
			pb := uint32(s11b)
			pa := uint32(s11a)
			pa1 := (0xffff - pa) * 0x101
			dst.Pix[d+0] = uint8((uint32(dst.Pix[d+0])*pa1/0xffff + pr) >> 8)
			dst.Pix[d+1] = uint8((uint32(dst.Pix[d+1])*pa1/0xffff + pg) >> 8)
			dst.Pix[d+2] = uint8((uint32(dst.Pix[d+2])*pa1/0xffff + pb) >> 8)
			dst.Pix[d+3] = uint8((uint32(dst.Pix[d+3])*pa1/0xffff + pa) >> 8)
		}
	}
}

func (ablInterpolator) transform_RGBA_Image_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point, opts *Options) {
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
				continue
			}

			sx -= 0.5
			sx0 := int(sx)
			xFrac0 := sx - float64(sx0)
			xFrac1 := 1 - xFrac0
			sx0 += bias.X
			sx1 := sx0 + 1
			if sx0 < sr.Min.X {
				sx0, sx1 = sr.Min.X, sr.Min.X
				xFrac0, xFrac1 = 0, 1
			} else if sx1 >= sr.Max.X {
				sx0, sx1 = sr.Max.X-1, sr.Max.X-1
				xFrac0, xFrac1 = 1, 0
			}

			sy -= 0.5
			sy0 := int(sy)
			yFrac0 := sy - float64(sy0)
			yFrac1 := 1 - yFrac0
			sy0 += bias.Y
			sy1 := sy0 + 1
			if sy0 < sr.Min.Y {
				sy0, sy1 = sr.Min.Y, sr.Min.Y
				yFrac0, yFrac1 = 0, 1
			} else if sy1 >= sr.Max.Y {
				sy0, sy1 = sr.Max.Y-1, sr.Max.Y-1
				yFrac0, yFrac1 = 1, 0
			}

			s00ru, s00gu, s00bu, s00au := src.At(sx0, sy0).RGBA()
			s00r := float64(s00ru)
			s00g := float64(s00gu)
			s00b := float64(s00bu)
			s00a := float64(s00au)
			s10ru, s10gu, s10bu, s10au := src.At(sx1, sy0).RGBA()
			s10r := float64(s10ru)
			s10g := float64(s10gu)
			s10b := float64(s10bu)
			s10a := float64(s10au)
			s10r = xFrac1*s00r + xFrac0*s10r
			s10g = xFrac1*s00g + xFrac0*s10g
			s10b = xFrac1*s00b + xFrac0*s10b
			s10a = xFrac1*s00a + xFrac0*s10a
			s01ru, s01gu, s01bu, s01au := src.At(sx0, sy1).RGBA()
			s01r := float64(s01ru)
			s01g := float64(s01gu)
			s01b := float64(s01bu)
			s01a := float64(s01au)
			s11ru, s11gu, s11bu, s11au := src.At(sx1, sy1).RGBA()
			s11r := float64(s11ru)
			s11g := float64(s11gu)
			s11b := float64(s11bu)
			s11a := float64(s11au)
			s11r = xFrac1*s01r + xFrac0*s11r
			s11g = xFrac1*s01g + xFrac0*s11g
			s11b = xFrac1*s01b + xFrac0*s11b
			s11a = xFrac1*s01a + xFrac0*s11a
			s11r = yFrac1*s10r + yFrac0*s11r
			s11g = yFrac1*s10g + yFrac0*s11g
			s11b = yFrac1*s10b + yFrac0*s11b
			s11a = yFrac1*s10a + yFrac0*s11a
			pr := uint32(s11r)
			pg := uint32(s11g)
			pb := uint32(s11b)
			pa := uint32(s11a)
			dst.Pix[d+0] = uint8(pr >> 8)
			dst.Pix[d+1] = uint8(pg >> 8)
			dst.Pix[d+2] = uint8(pb >> 8)
			dst.Pix[d+3] = uint8(pa >> 8)
		}
	}
}

func (ablInterpolator) transform_Image_Image_Over(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point, opts *Options) {
	srcMask, smp := opts.SrcMask, opts.SrcMaskP
	dstMask, dmp := opts.DstMask, opts.DstMaskP
	dstColorRGBA64 := &color.RGBA64{}
	dstColor := color.Color(dstColorRGBA64)
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
				continue
			}

			sx -= 0.5
			sx0 := int(sx)
			xFrac0 := sx - float64(sx0)
			xFrac1 := 1 - xFrac0
			sx0 += bias.X
			sx1 := sx0 + 1
			if sx0 < sr.Min.X {
				sx0, sx1 = sr.Min.X, sr.Min.X
				xFrac0, xFrac1 = 0, 1
			} else if sx1 >= sr.Max.X {
				sx0, sx1 = sr.Max.X-1, sr.Max.X-1
				xFrac0, xFrac1 = 1, 0
			}

			sy -= 0.5
			sy0 := int(sy)
			yFrac0 := sy - float64(sy0)
			yFrac1 := 1 - yFrac0
			sy0 += bias.Y
			sy1 := sy0 + 1
			if sy0 < sr.Min.Y {
				sy0, sy1 = sr.Min.Y, sr.Min.Y
				yFrac0, yFrac1 = 0, 1
			} else if sy1 >= sr.Max.Y {
				sy0, sy1 = sr.Max.Y-1, sr.Max.Y-1
				yFrac0, yFrac1 = 1, 0
			}

			s00ru, s00gu, s00bu, s00au := src.At(sx0, sy0).RGBA()
			if srcMask != nil {
				_, _, _, ma := srcMask.At(smp.X+sx0, smp.Y+sy0).RGBA()
				s00ru = s00ru * ma / 0xffff
				s00gu = s00gu * ma / 0xffff
				s00bu = s00bu * ma / 0xffff
				s00au = s00au * ma / 0xffff
			}
			s00r := float64(s00ru)
			s00g := float64(s00gu)
			s00b := float64(s00bu)
			s00a := float64(s00au)
			s10ru, s10gu, s10bu, s10au := src.At(sx1, sy0).RGBA()
			if srcMask != nil {
				_, _, _, ma := srcMask.At(smp.X+sx1, smp.Y+sy0).RGBA()
				s10ru = s10ru * ma / 0xffff
				s10gu = s10gu * ma / 0xffff
				s10bu = s10bu * ma / 0xffff
				s10au = s10au * ma / 0xffff
			}
			s10r := float64(s10ru)
			s10g := float64(s10gu)
			s10b := float64(s10bu)
			s10a := float64(s10au)
			s10r = xFrac1*s00r + xFrac0*s10r
			s10g = xFrac1*s00g + xFrac0*s10g
			s10b = xFrac1*s00b + xFrac0*s10b
			s10a = xFrac1*s00a + xFrac0*s10a
			s01ru, s01gu, s01bu, s01au := src.At(sx0, sy1).RGBA()
			if srcMask != nil {
				_, _, _, ma := srcMask.At(smp.X+sx0, smp.Y+sy1).RGBA()
				s01ru = s01ru * ma / 0xffff
				s01gu = s01gu * ma / 0xffff
				s01bu = s01bu * ma / 0xffff
				s01au = s01au * ma / 0xffff
			}
			s01r := float64(s01ru)
			s01g := float64(s01gu)
			s01b := float64(s01bu)
			s01a := float64(s01au)
			s11ru, s11gu, s11bu, s11au := src.At(sx1, sy1).RGBA()
			if srcMask != nil {
				_, _, _, ma := srcMask.At(smp.X+sx1, smp.Y+sy1).RGBA()
				s11ru = s11ru * ma / 0xffff
				s11gu = s11gu * ma / 0xffff
				s11bu = s11bu * ma / 0xffff
				s11au = s11au * ma / 0xffff
			}
			s11r := float64(s11ru)
			s11g := float64(s11gu)
			s11b := float64(s11bu)
			s11a := float64(s11au)
			s11r = xFrac1*s01r + xFrac0*s11r
			s11g = xFrac1*s01g + xFrac0*s11g
			s11b = xFrac1*s01b + xFrac0*s11b
			s11a = xFrac1*s01a + xFrac0*s11a
			s11r = yFrac1*s10r + yFrac0*s11r
			s11g = yFrac1*s10g + yFrac0*s11g
			s11b = yFrac1*s10b + yFrac0*s11b
			s11a = yFrac1*s10a + yFrac0*s11a
			pr := uint32(s11r)
			pg := uint32(s11g)
			pb := uint32(s11b)
			pa := uint32(s11a)
			qr, qg, qb, qa := dst.At(dr.Min.X+int(dx), dr.Min.Y+int(dy)).RGBA()
			if dstMask != nil {
				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA()
				pr = pr * ma / 0xffff
				pg = pg * ma / 0xffff
				pb = pb * ma / 0xffff
				pa = pa * ma / 0xffff
			}
			pa1 := 0xffff - pa
			dstColorRGBA64.R = uint16(qr*pa1/0xffff + pr)
			dstColorRGBA64.G = uint16(qg*pa1/0xffff + pg)
			dstColorRGBA64.B = uint16(qb*pa1/0xffff + pb)
			dstColorRGBA64.A = uint16(qa*pa1/0xffff + pa)
			dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColor)
		}
	}
}

func (ablInterpolator) transform_Image_Image_Src(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point, opts *Options) {
	srcMask, smp := opts.SrcMask, opts.SrcMaskP
	dstMask, dmp := opts.DstMask, opts.DstMaskP
	dstColorRGBA64 := &color.RGBA64{}
	dstColor := color.Color(dstColorRGBA64)
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
				continue
			}

			sx -= 0.5
			sx0 := int(sx)
			xFrac0 := sx - float64(sx0)
			xFrac1 := 1 - xFrac0
			sx0 += bias.X
			sx1 := sx0 + 1
			if sx0 < sr.Min.X {
				sx0, sx1 = sr.Min.X, sr.Min.X
				xFrac0, xFrac1 = 0, 1
			} else if sx1 >= sr.Max.X {
				sx0, sx1 = sr.Max.X-1, sr.Max.X-1
				xFrac0, xFrac1 = 1, 0
			}

			sy -= 0.5
			sy0 := int(sy)
			yFrac0 := sy - float64(sy0)
			yFrac1 := 1 - yFrac0
			sy0 += bias.Y
			sy1 := sy0 + 1
			if sy0 < sr.Min.Y {
				sy0, sy1 = sr.Min.Y, sr.Min.Y
				yFrac0, yFrac1 = 0, 1
			} else if sy1 >= sr.Max.Y {
				sy0, sy1 = sr.Max.Y-1, sr.Max.Y-1
				yFrac0, yFrac1 = 1, 0
			}

			s00ru, s00gu, s00bu, s00au := src.At(sx0, sy0).RGBA()
			if srcMask != nil {
				_, _, _, ma := srcMask.At(smp.X+sx0, smp.Y+sy0).RGBA()
				s00ru = s00ru * ma / 0xffff
				s00gu = s00gu * ma / 0xffff
				s00bu = s00bu * ma / 0xffff
				s00au = s00au * ma / 0xffff
			}
			s00r := float64(s00ru)
			s00g := float64(s00gu)
			s00b := float64(s00bu)
			s00a := float64(s00au)
			s10ru, s10gu, s10bu, s10au := src.At(sx1, sy0).RGBA()
			if srcMask != nil {
				_, _, _, ma := srcMask.At(smp.X+sx1, smp.Y+sy0).RGBA()
				s10ru = s10ru * ma / 0xffff
				s10gu = s10gu * ma / 0xffff
				s10bu = s10bu * ma / 0xffff
				s10au = s10au * ma / 0xffff
			}
			s10r := float64(s10ru)
			s10g := float64(s10gu)
			s10b := float64(s10bu)
			s10a := float64(s10au)
			s10r = xFrac1*s00r + xFrac0*s10r
			s10g = xFrac1*s00g + xFrac0*s10g
			s10b = xFrac1*s00b + xFrac0*s10b
			s10a = xFrac1*s00a + xFrac0*s10a
			s01ru, s01gu, s01bu, s01au := src.At(sx0, sy1).RGBA()
			if srcMask != nil {
				_, _, _, ma := srcMask.At(smp.X+sx0, smp.Y+sy1).RGBA()
				s01ru = s01ru * ma / 0xffff
				s01gu = s01gu * ma / 0xffff
				s01bu = s01bu * ma / 0xffff
				s01au = s01au * ma / 0xffff
			}
			s01r := float64(s01ru)
			s01g := float64(s01gu)
			s01b := float64(s01bu)
			s01a := float64(s01au)
			s11ru, s11gu, s11bu, s11au := src.At(sx1, sy1).RGBA()
			if srcMask != nil {
				_, _, _, ma := srcMask.At(smp.X+sx1, smp.Y+sy1).RGBA()
				s11ru = s11ru * ma / 0xffff
				s11gu = s11gu * ma / 0xffff
				s11bu = s11bu * ma / 0xffff
				s11au = s11au * ma / 0xffff
			}
			s11r := float64(s11ru)
			s11g := float64(s11gu)
			s11b := float64(s11bu)
			s11a := float64(s11au)
			s11r = xFrac1*s01r + xFrac0*s11r
			s11g = xFrac1*s01g + xFrac0*s11g
			s11b = xFrac1*s01b + xFrac0*s11b
			s11a = xFrac1*s01a + xFrac0*s11a
			s11r = yFrac1*s10r + yFrac0*s11r
			s11g = yFrac1*s10g + yFrac0*s11g
			s11b = yFrac1*s10b + yFrac0*s11b
			s11a = yFrac1*s10a + yFrac0*s11a
			pr := uint32(s11r)
			pg := uint32(s11g)
			pb := uint32(s11b)
			pa := uint32(s11a)
			if dstMask != nil {
				qr, qg, qb, qa := dst.At(dr.Min.X+int(dx), dr.Min.Y+int(dy)).RGBA()
				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA()
				pr = pr * ma / 0xffff
				pg = pg * ma / 0xffff
				pb = pb * ma / 0xffff
				pa = pa * ma / 0xffff
				pa1 := 0xffff - ma
				dstColorRGBA64.R = uint16(qr*pa1/0xffff + pr)
				dstColorRGBA64.G = uint16(qg*pa1/0xffff + pg)
				dstColorRGBA64.B = uint16(qb*pa1/0xffff + pb)
				dstColorRGBA64.A = uint16(qa*pa1/0xffff + pa)
				dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColor)
			} else {
				dstColorRGBA64.R = uint16(pr)
				dstColorRGBA64.G = uint16(pg)
				dstColorRGBA64.B = uint16(pb)
				dstColorRGBA64.A = uint16(pa)
				dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColor)
			}
		}
	}
}

func (z *kernelScaler) Scale(dst Image, dr image.Rectangle, src image.Image, sr image.Rectangle, op Op, opts *Options) {
	if z.dw != int32(dr.Dx()) || z.dh != int32(dr.Dy()) || z.sw != int32(sr.Dx()) || z.sh != int32(sr.Dy()) {
		z.kernel.Scale(dst, dr, src, sr, op, opts)
		return
	}

	var o Options
	if opts != nil {
		o = *opts
	}

	// adr is the affected destination pixels.
	adr := dst.Bounds().Intersect(dr)
	adr, o.DstMask = clipAffectedDestRect(adr, o.DstMask, o.DstMaskP)
	if adr.Empty() || sr.Empty() {
		return
	}
	// Make adr relative to dr.Min.
	adr = adr.Sub(dr.Min)
	if op == Over && o.SrcMask == nil && opaque(src) {
		op = Src
	}

	if _, ok := src.(*image.Uniform); ok && o.DstMask == nil && o.SrcMask == nil && sr.In(src.Bounds()) {
		Draw(dst, dr, src, src.Bounds().Min, op)
		return
	}

	// Create a temporary buffer:
	// scaleX distributes the source image's columns over the temporary image.
	// scaleY distributes the temporary image's rows over the destination image.
	var tmp [][4]float64
	if z.pool.New != nil {
		tmpp := z.pool.Get().(*[][4]float64)
		defer z.pool.Put(tmpp)
		tmp = *tmpp
	} else {
		tmp = z.makeTmpBuf()
	}

	// sr is the source pixels. If it extends beyond the src bounds,
	// we cannot use the type-specific fast paths, as they access
	// the Pix fields directly without bounds checking.
	//
	// Similarly, the fast paths assume that the masks are nil.
	if o.SrcMask != nil || !sr.In(src.Bounds()) {
		z.scaleX_Image(tmp, src, sr, &o)
	} else {
		switch src := src.(type) {
		case *image.Gray:
			z.scaleX_Gray(tmp, src, sr, &o)
		case *image.NRGBA:
			z.scaleX_NRGBA(tmp, src, sr, &o)
		case *image.RGBA:
			z.scaleX_RGBA(tmp, src, sr, &o)
		case *image.YCbCr:
			switch src.SubsampleRatio {
			default:
				z.scaleX_Image(tmp, src, sr, &o)
			case image.YCbCrSubsampleRatio444:
				z.scaleX_YCbCr444(tmp, src, sr, &o)
			case image.YCbCrSubsampleRatio422:
				z.scaleX_YCbCr422(tmp, src, sr, &o)
			case image.YCbCrSubsampleRatio420:
				z.scaleX_YCbCr420(tmp, src, sr, &o)
			case image.YCbCrSubsampleRatio440:
				z.scaleX_YCbCr440(tmp, src, sr, &o)
			}
		default:
			z.scaleX_Image(tmp, src, sr, &o)
		}
	}

	if o.DstMask != nil {
		switch op {
		case Over:
			z.scaleY_Image_Over(dst, dr, adr, tmp, &o)
		case Src:
			z.scaleY_Image_Src(dst, dr, adr, tmp, &o)
		}
	} else {
		switch op {
		case Over:
			switch dst := dst.(type) {
			case *image.RGBA:
				z.scaleY_RGBA_Over(dst, dr, adr, tmp, &o)
			default:
				z.scaleY_Image_Over(dst, dr, adr, tmp, &o)
			}
		case Src:
			switch dst := dst.(type) {
			case *image.RGBA:
				z.scaleY_RGBA_Src(dst, dr, adr, tmp, &o)
			default:
				z.scaleY_Image_Src(dst, dr, adr, tmp, &o)
			}
		}
	}
}

func (q *Kernel) Transform(dst Image, s2d f64.Aff3, src image.Image, sr image.Rectangle, op Op, opts *Options) {
	var o Options
	if opts != nil {
		o = *opts
	}

	dr := transformRect(&s2d, &sr)
	// adr is the affected destination pixels.
	adr := dst.Bounds().Intersect(dr)
	adr, o.DstMask = clipAffectedDestRect(adr, o.DstMask, o.DstMaskP)
	if adr.Empty() || sr.Empty() {
		return
	}
	if op == Over && o.SrcMask == nil && opaque(src) {
		op = Src
	}
	d2s := invert(&s2d)
	// bias is a translation of the mapping from dst coordinates to src
	// coordinates such that the latter temporarily have non-negative X
	// and Y coordinates. This allows us to write int(f) instead of
	// int(math.Floor(f)), since "round to zero" and "round down" are
	// equivalent when f >= 0, but the former is much cheaper. The X--
	// and Y-- are because the TransformLeaf methods have a "sx -= 0.5"
	// adjustment.
	bias := transformRect(&d2s, &adr).Min
	bias.X--
	bias.Y--
	d2s[2] -= float64(bias.X)
	d2s[5] -= float64(bias.Y)
	// Make adr relative to dr.Min.
	adr = adr.Sub(dr.Min)

	if u, ok := src.(*image.Uniform); ok && o.DstMask != nil && o.SrcMask != nil && sr.In(src.Bounds()) {
		transform_Uniform(dst, dr, adr, &d2s, u, sr, bias, op)
		return
	}

	xscale := abs(d2s[0])
	if s := abs(d2s[1]); xscale < s {
		xscale = s
	}
	yscale := abs(d2s[3])
	if s := abs(d2s[4]); yscale < s {
		yscale = s
	}

	// sr is the source pixels. If it extends beyond the src bounds,
	// we cannot use the type-specific fast paths, as they access
	// the Pix fields directly without bounds checking.
	//
	// Similarly, the fast paths assume that the masks are nil.
	if o.DstMask != nil || o.SrcMask != nil || !sr.In(src.Bounds()) {
		switch op {
		case Over:
			q.transform_Image_Image_Over(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale, &o)
		case Src:
			q.transform_Image_Image_Src(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale, &o)
		}
	} else {
		switch op {
		case Over:
			switch dst := dst.(type) {
			case *image.RGBA:
				switch src := src.(type) {
				case *image.NRGBA:
					q.transform_RGBA_NRGBA_Over(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale, &o)
				case *image.RGBA:
					q.transform_RGBA_RGBA_Over(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale, &o)
				default:
					q.transform_RGBA_Image_Over(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale, &o)
				}
			default:
				switch src := src.(type) {
				default:
					q.transform_Image_Image_Over(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale, &o)
				}
			}
		case Src:
			switch dst := dst.(type) {
			case *image.RGBA:
				switch src := src.(type) {
				case *image.Gray:
					q.transform_RGBA_Gray_Src(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale, &o)
				case *image.NRGBA:
					q.transform_RGBA_NRGBA_Src(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale, &o)
				case *image.RGBA:
					q.transform_RGBA_RGBA_Src(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale, &o)
				case *image.YCbCr:
					switch src.SubsampleRatio {
					default:
						q.transform_RGBA_Image_Src(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale, &o)
					case image.YCbCrSubsampleRatio444:
						q.transform_RGBA_YCbCr444_Src(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale, &o)
					case image.YCbCrSubsampleRatio422:
						q.transform_RGBA_YCbCr422_Src(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale, &o)
					case image.YCbCrSubsampleRatio420:
						q.transform_RGBA_YCbCr420_Src(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale, &o)
					case image.YCbCrSubsampleRatio440:
						q.transform_RGBA_YCbCr440_Src(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale, &o)
					}
				default:
					q.transform_RGBA_Image_Src(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale, &o)
				}
			default:
				switch src := src.(type) {
				default:
					q.transform_Image_Image_Src(dst, dr, adr, &d2s, src, sr, bias, xscale, yscale, &o)
				}
			}
		}
	}
}

func (z *kernelScaler) scaleX_Gray(tmp [][4]float64, src *image.Gray, sr image.Rectangle, opts *Options) {
	t := 0
	for y := int32(0); y < z.sh; y++ {
		for _, s := range z.horizontal.sources {
			var pr float64
			for _, c := range z.horizontal.contribs[s.i:s.j] {
				pi := (sr.Min.Y+int(y)-src.Rect.Min.Y)*src.Stride + (sr.Min.X + int(c.coord) - src.Rect.Min.X)
				pru := uint32(src.Pix[pi]) * 0x101
				pr += float64(pru) * c.weight
			}
			pr *= s.invTotalWeightFFFF
			tmp[t] = [4]float64{
				pr,
				pr,
				pr,
				1,
			}
			t++
		}
	}
}

func (z *kernelScaler) scaleX_NRGBA(tmp [][4]float64, src *image.NRGBA, sr image.Rectangle, opts *Options) {
	t := 0
	for y := int32(0); y < z.sh; y++ {
		for _, s := range z.horizontal.sources {
			var pr, pg, pb, pa float64
			for _, c := range z.horizontal.contribs[s.i:s.j] {
				pi := (sr.Min.Y+int(y)-src.Rect.Min.Y)*src.Stride + (sr.Min.X+int(c.coord)-src.Rect.Min.X)*4
				pau := uint32(src.Pix[pi+3]) * 0x101
				pru := uint32(src.Pix[pi+0]) * pau / 0xff
				pgu := uint32(src.Pix[pi+1]) * pau / 0xff
				pbu := uint32(src.Pix[pi+2]) * pau / 0xff
				pr += float64(pru) * c.weight
				pg += float64(pgu) * c.weight
				pb += float64(pbu) * c.weight
				pa += float64(pau) * c.weight
			}
			tmp[t] = [4]float64{
				pr * s.invTotalWeightFFFF,
				pg * s.invTotalWeightFFFF,
				pb * s.invTotalWeightFFFF,
				pa * s.invTotalWeightFFFF,
			}
			t++
		}
	}
}

func (z *kernelScaler) scaleX_RGBA(tmp [][4]float64, src *image.RGBA, sr image.Rectangle, opts *Options) {
	t := 0
	for y := int32(0); y < z.sh; y++ {
		for _, s := range z.horizontal.sources {
			var pr, pg, pb, pa float64
			for _, c := range z.horizontal.contribs[s.i:s.j] {
				pi := (sr.Min.Y+int(y)-src.Rect.Min.Y)*src.Stride + (sr.Min.X+int(c.coord)-src.Rect.Min.X)*4
				pru := uint32(src.Pix[pi+0]) * 0x101
				pgu := uint32(src.Pix[pi+1]) * 0x101
				pbu := uint32(src.Pix[pi+2]) * 0x101
				pau := uint32(src.Pix[pi+3]) * 0x101
				pr += float64(pru) * c.weight
				pg += float64(pgu) * c.weight
				pb += float64(pbu) * c.weight
				pa += float64(pau) * c.weight
			}
			tmp[t] = [4]float64{
				pr * s.invTotalWeightFFFF,
				pg * s.invTotalWeightFFFF,
				pb * s.invTotalWeightFFFF,
				pa * s.invTotalWeightFFFF,
			}
			t++
		}
	}
}

func (z *kernelScaler) scaleX_YCbCr444(tmp [][4]float64, src *image.YCbCr, sr image.Rectangle, opts *Options) {
	t := 0
	for y := int32(0); y < z.sh; y++ {
		for _, s := range z.horizontal.sources {
			var pr, pg, pb float64
			for _, c := range z.horizontal.contribs[s.i:s.j] {
				pi := (sr.Min.Y+int(y)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(c.coord) - src.Rect.Min.X)
				pj := (sr.Min.Y+int(y)-src.Rect.Min.Y)*src.CStride + (sr.Min.X + int(c.coord) - src.Rect.Min.X)

				// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
				pyy1 := int(src.Y[pi]) * 0x10101
				pcb1 := int(src.Cb[pj]) - 128
				pcr1 := int(src.Cr[pj]) - 128
				pru := (pyy1 + 91881*pcr1) >> 8
				pgu := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 8
				pbu := (pyy1 + 116130*pcb1) >> 8
				if pru < 0 {
					pru = 0
				} else if pru > 0xffff {
					pru = 0xffff
				}
				if pgu < 0 {
					pgu = 0
				} else if pgu > 0xffff {
					pgu = 0xffff
				}
				if pbu < 0 {
					pbu = 0
				} else if pbu > 0xffff {
					pbu = 0xffff
				}

				pr += float64(pru) * c.weight
				pg += float64(pgu) * c.weight
				pb += float64(pbu) * c.weight
			}
			tmp[t] = [4]float64{
				pr * s.invTotalWeightFFFF,
				pg * s.invTotalWeightFFFF,
				pb * s.invTotalWeightFFFF,
				1,
			}
			t++
		}
	}
}

func (z *kernelScaler) scaleX_YCbCr422(tmp [][4]float64, src *image.YCbCr, sr image.Rectangle, opts *Options) {
	t := 0
	for y := int32(0); y < z.sh; y++ {
		for _, s := range z.horizontal.sources {
			var pr, pg, pb float64
			for _, c := range z.horizontal.contribs[s.i:s.j] {
				pi := (sr.Min.Y+int(y)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(c.coord) - src.Rect.Min.X)
				pj := (sr.Min.Y+int(y)-src.Rect.Min.Y)*src.CStride + ((sr.Min.X+int(c.coord))/2 - src.Rect.Min.X/2)

				// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
				pyy1 := int(src.Y[pi]) * 0x10101
				pcb1 := int(src.Cb[pj]) - 128
				pcr1 := int(src.Cr[pj]) - 128
				pru := (pyy1 + 91881*pcr1) >> 8
				pgu := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 8
				pbu := (pyy1 + 116130*pcb1) >> 8
				if pru < 0 {
					pru = 0
				} else if pru > 0xffff {
					pru = 0xffff
				}
				if pgu < 0 {
					pgu = 0
				} else if pgu > 0xffff {
					pgu = 0xffff
				}
				if pbu < 0 {
					pbu = 0
				} else if pbu > 0xffff {
					pbu = 0xffff
				}

				pr += float64(pru) * c.weight
				pg += float64(pgu) * c.weight
				pb += float64(pbu) * c.weight
			}
			tmp[t] = [4]float64{
				pr * s.invTotalWeightFFFF,
				pg * s.invTotalWeightFFFF,
				pb * s.invTotalWeightFFFF,
				1,
			}
			t++
		}
	}
}

func (z *kernelScaler) scaleX_YCbCr420(tmp [][4]float64, src *image.YCbCr, sr image.Rectangle, opts *Options) {
	t := 0
	for y := int32(0); y < z.sh; y++ {
		for _, s := range z.horizontal.sources {
			var pr, pg, pb float64
			for _, c := range z.horizontal.contribs[s.i:s.j] {
				pi := (sr.Min.Y+int(y)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(c.coord) - src.Rect.Min.X)
				pj := ((sr.Min.Y+int(y))/2-src.Rect.Min.Y/2)*src.CStride + ((sr.Min.X+int(c.coord))/2 - src.Rect.Min.X/2)

				// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
				pyy1 := int(src.Y[pi]) * 0x10101
				pcb1 := int(src.Cb[pj]) - 128
				pcr1 := int(src.Cr[pj]) - 128
				pru := (pyy1 + 91881*pcr1) >> 8
				pgu := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 8
				pbu := (pyy1 + 116130*pcb1) >> 8
				if pru < 0 {
					pru = 0
				} else if pru > 0xffff {
					pru = 0xffff
				}
				if pgu < 0 {
					pgu = 0
				} else if pgu > 0xffff {
					pgu = 0xffff
				}
				if pbu < 0 {
					pbu = 0
				} else if pbu > 0xffff {
					pbu = 0xffff
				}

				pr += float64(pru) * c.weight
				pg += float64(pgu) * c.weight
				pb += float64(pbu) * c.weight
			}
			tmp[t] = [4]float64{
				pr * s.invTotalWeightFFFF,
				pg * s.invTotalWeightFFFF,
				pb * s.invTotalWeightFFFF,
				1,
			}
			t++
		}
	}
}

func (z *kernelScaler) scaleX_YCbCr440(tmp [][4]float64, src *image.YCbCr, sr image.Rectangle, opts *Options) {
	t := 0
	for y := int32(0); y < z.sh; y++ {
		for _, s := range z.horizontal.sources {
			var pr, pg, pb float64
			for _, c := range z.horizontal.contribs[s.i:s.j] {
				pi := (sr.Min.Y+int(y)-src.Rect.Min.Y)*src.YStride + (sr.Min.X + int(c.coord) - src.Rect.Min.X)
				pj := ((sr.Min.Y+int(y))/2-src.Rect.Min.Y/2)*src.CStride + (sr.Min.X + int(c.coord) - src.Rect.Min.X)

				// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
				pyy1 := int(src.Y[pi]) * 0x10101
				pcb1 := int(src.Cb[pj]) - 128
				pcr1 := int(src.Cr[pj]) - 128
				pru := (pyy1 + 91881*pcr1) >> 8
				pgu := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 8
				pbu := (pyy1 + 116130*pcb1) >> 8
				if pru < 0 {
					pru = 0
				} else if pru > 0xffff {
					pru = 0xffff
				}
				if pgu < 0 {
					pgu = 0
				} else if pgu > 0xffff {
					pgu = 0xffff
				}
				if pbu < 0 {
					pbu = 0
				} else if pbu > 0xffff {
					pbu = 0xffff
				}

				pr += float64(pru) * c.weight
				pg += float64(pgu) * c.weight
				pb += float64(pbu) * c.weight
			}
			tmp[t] = [4]float64{
				pr * s.invTotalWeightFFFF,
				pg * s.invTotalWeightFFFF,
				pb * s.invTotalWeightFFFF,
				1,
			}
			t++
		}
	}
}

func (z *kernelScaler) scaleX_Image(tmp [][4]float64, src image.Image, sr image.Rectangle, opts *Options) {
	t := 0
	srcMask, smp := opts.SrcMask, opts.SrcMaskP
	for y := int32(0); y < z.sh; y++ {
		for _, s := range z.horizontal.sources {
			var pr, pg, pb, pa float64
			for _, c := range z.horizontal.contribs[s.i:s.j] {
				pru, pgu, pbu, pau := src.At(sr.Min.X+int(c.coord), sr.Min.Y+int(y)).RGBA()
				if srcMask != nil {
					_, _, _, ma := srcMask.At(smp.X+sr.Min.X+int(c.coord), smp.Y+sr.Min.Y+int(y)).RGBA()
					pru = pru * ma / 0xffff
					pgu = pgu * ma / 0xffff
					pbu = pbu * ma / 0xffff
					pau = pau * ma / 0xffff
				}
				pr += float64(pru) * c.weight
				pg += float64(pgu) * c.weight
				pb += float64(pbu) * c.weight
				pa += float64(pau) * c.weight
			}
			tmp[t] = [4]float64{
				pr * s.invTotalWeightFFFF,
				pg * s.invTotalWeightFFFF,
				pb * s.invTotalWeightFFFF,
				pa * s.invTotalWeightFFFF,
			}
			t++
		}
	}
}

func (z *kernelScaler) scaleY_RGBA_Over(dst *image.RGBA, dr, adr image.Rectangle, tmp [][4]float64, opts *Options) {
	for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
		d := (dr.Min.Y+adr.Min.Y-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+int(dx)-dst.Rect.Min.X)*4
		for _, s := range z.vertical.sources[adr.Min.Y:adr.Max.Y] {
			var pr, pg, pb, pa float64
			for _, c := range z.vertical.contribs[s.i:s.j] {
				p := &tmp[c.coord*z.dw+dx]
				pr += p[0] * c.weight
				pg += p[1] * c.weight
				pb += p[2] * c.weight
				pa += p[3] * c.weight
			}

			if pr > pa {
				pr = pa
			}
			if pg > pa {
				pg = pa
			}
			if pb > pa {
				pb = pa
			}

			pr0 := uint32(ftou(pr * s.invTotalWeight))
			pg0 := uint32(ftou(pg * s.invTotalWeight))
			pb0 := uint32(ftou(pb * s.invTotalWeight))
			pa0 := uint32(ftou(pa * s.invTotalWeight))
			pa1 := (0xffff - uint32(pa0)) * 0x101
			dst.Pix[d+0] = uint8((uint32(dst.Pix[d+0])*pa1/0xffff + pr0) >> 8)
			dst.Pix[d+1] = uint8((uint32(dst.Pix[d+1])*pa1/0xffff + pg0) >> 8)
			dst.Pix[d+2] = uint8((uint32(dst.Pix[d+2])*pa1/0xffff + pb0) >> 8)
			dst.Pix[d+3] = uint8((uint32(dst.Pix[d+3])*pa1/0xffff + pa0) >> 8)
			d += dst.Stride
		}
	}
}

func (z *kernelScaler) scaleY_RGBA_Src(dst *image.RGBA, dr, adr image.Rectangle, tmp [][4]float64, opts *Options) {
	for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
		d := (dr.Min.Y+adr.Min.Y-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+int(dx)-dst.Rect.Min.X)*4
		for _, s := range z.vertical.sources[adr.Min.Y:adr.Max.Y] {
			var pr, pg, pb, pa float64
			for _, c := range z.vertical.contribs[s.i:s.j] {
				p := &tmp[c.coord*z.dw+dx]
				pr += p[0] * c.weight
				pg += p[1] * c.weight
				pb += p[2] * c.weight
				pa += p[3] * c.weight
			}

			if pr > pa {
				pr = pa
			}
			if pg > pa {
				pg = pa
			}
			if pb > pa {
				pb = pa
			}

			dst.Pix[d+0] = uint8(ftou(pr*s.invTotalWeight) >> 8)
			dst.Pix[d+1] = uint8(ftou(pg*s.invTotalWeight) >> 8)
			dst.Pix[d+2] = uint8(ftou(pb*s.invTotalWeight) >> 8)
			dst.Pix[d+3] = uint8(ftou(pa*s.invTotalWeight) >> 8)
			d += dst.Stride
		}
	}
}

func (z *kernelScaler) scaleY_Image_Over(dst Image, dr, adr image.Rectangle, tmp [][4]float64, opts *Options) {
	dstMask, dmp := opts.DstMask, opts.DstMaskP
	dstColorRGBA64 := &color.RGBA64{}
	dstColor := color.Color(dstColorRGBA64)
	for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
		for dy, s := range z.vertical.sources[adr.Min.Y:adr.Max.Y] {
			var pr, pg, pb, pa float64
			for _, c := range z.vertical.contribs[s.i:s.j] {
				p := &tmp[c.coord*z.dw+dx]
				pr += p[0] * c.weight
				pg += p[1] * c.weight
				pb += p[2] * c.weight
				pa += p[3] * c.weight
			}

			if pr > pa {
				pr = pa
			}
			if pg > pa {
				pg = pa
			}
			if pb > pa {
				pb = pa
			}

			qr, qg, qb, qa := dst.At(dr.Min.X+int(dx), dr.Min.Y+int(adr.Min.Y+dy)).RGBA()
			pr0 := uint32(ftou(pr * s.invTotalWeight))
			pg0 := uint32(ftou(pg * s.invTotalWeight))
			pb0 := uint32(ftou(pb * s.invTotalWeight))
			pa0 := uint32(ftou(pa * s.invTotalWeight))
			if dstMask != nil {
				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(adr.Min.Y+dy)).RGBA()
				pr0 = pr0 * ma / 0xffff
				pg0 = pg0 * ma / 0xffff
				pb0 = pb0 * ma / 0xffff
				pa0 = pa0 * ma / 0xffff
			}
			pa1 := 0xffff - pa0
			dstColorRGBA64.R = uint16(qr*pa1/0xffff + pr0)
			dstColorRGBA64.G = uint16(qg*pa1/0xffff + pg0)
			dstColorRGBA64.B = uint16(qb*pa1/0xffff + pb0)
			dstColorRGBA64.A = uint16(qa*pa1/0xffff + pa0)
			dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(adr.Min.Y+dy), dstColor)
		}
	}
}

func (z *kernelScaler) scaleY_Image_Src(dst Image, dr, adr image.Rectangle, tmp [][4]float64, opts *Options) {
	dstMask, dmp := opts.DstMask, opts.DstMaskP
	dstColorRGBA64 := &color.RGBA64{}
	dstColor := color.Color(dstColorRGBA64)
	for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
		for dy, s := range z.vertical.sources[adr.Min.Y:adr.Max.Y] {
			var pr, pg, pb, pa float64
			for _, c := range z.vertical.contribs[s.i:s.j] {
				p := &tmp[c.coord*z.dw+dx]
				pr += p[0] * c.weight
				pg += p[1] * c.weight
				pb += p[2] * c.weight
				pa += p[3] * c.weight
			}

			if pr > pa {
				pr = pa
			}
			if pg > pa {
				pg = pa
			}
			if pb > pa {
				pb = pa
			}

			if dstMask != nil {
				qr, qg, qb, qa := dst.At(dr.Min.X+int(dx), dr.Min.Y+int(adr.Min.Y+dy)).RGBA()
				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(adr.Min.Y+dy)).RGBA()
				pr := uint32(ftou(pr*s.invTotalWeight)) * ma / 0xffff
				pg := uint32(ftou(pg*s.invTotalWeight)) * ma / 0xffff
				pb := uint32(ftou(pb*s.invTotalWeight)) * ma / 0xffff
				pa := uint32(ftou(pa*s.invTotalWeight)) * ma / 0xffff
				pa1 := 0xffff - ma
				dstColorRGBA64.R = uint16(qr*pa1/0xffff + pr)
				dstColorRGBA64.G = uint16(qg*pa1/0xffff + pg)
				dstColorRGBA64.B = uint16(qb*pa1/0xffff + pb)
				dstColorRGBA64.A = uint16(qa*pa1/0xffff + pa)
				dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(adr.Min.Y+dy), dstColor)
			} else {
				dstColorRGBA64.R = ftou(pr * s.invTotalWeight)
				dstColorRGBA64.G = ftou(pg * s.invTotalWeight)
				dstColorRGBA64.B = ftou(pb * s.invTotalWeight)
				dstColorRGBA64.A = ftou(pa * s.invTotalWeight)
				dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(adr.Min.Y+dy), dstColor)
			}
		}
	}
}

func (q *Kernel) transform_RGBA_Gray_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.Gray, sr image.Rectangle, bias image.Point, xscale, yscale float64, opts *Options) {
	// When shrinking, broaden the effective kernel support so that we still
	// visit every source pixel.
	xHalfWidth, xKernelArgScale := q.Support, 1.0
	if xscale > 1 {
		xHalfWidth *= xscale
		xKernelArgScale = 1 / xscale
	}
	yHalfWidth, yKernelArgScale := q.Support, 1.0
	if yscale > 1 {
		yHalfWidth *= yscale
		yKernelArgScale = 1 / yscale
	}

	xWeights := make([]float64, 1+2*int(math.Ceil(xHalfWidth)))
	yWeights := make([]float64, 1+2*int(math.Ceil(yHalfWidth)))

	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
				continue
			}

			// TODO: adjust the bias so that we can use int(f) instead
			// of math.Floor(f) and math.Ceil(f).
			sx += float64(bias.X)
			sx -= 0.5
			ix := int(math.Floor(sx - xHalfWidth))
			if ix < sr.Min.X {
				ix = sr.Min.X
			}
			jx := int(math.Ceil(sx + xHalfWidth))
			if jx > sr.Max.X {
				jx = sr.Max.X
			}

			totalXWeight := 0.0
			for kx := ix; kx < jx; kx++ {
				xWeight := 0.0
				if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support {
					xWeight = q.At(t)
				}
				xWeights[kx-ix] = xWeight
				totalXWeight += xWeight
			}
			for x := range xWeights[:jx-ix] {
				xWeights[x] /= totalXWeight
			}

			sy += float64(bias.Y)
			sy -= 0.5
			iy := int(math.Floor(sy - yHalfWidth))
			if iy < sr.Min.Y {
				iy = sr.Min.Y
			}
			jy := int(math.Ceil(sy + yHalfWidth))
			if jy > sr.Max.Y {
				jy = sr.Max.Y
			}

			totalYWeight := 0.0
			for ky := iy; ky < jy; ky++ {
				yWeight := 0.0
				if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support {
					yWeight = q.At(t)
				}
				yWeights[ky-iy] = yWeight
				totalYWeight += yWeight
			}
			for y := range yWeights[:jy-iy] {
				yWeights[y] /= totalYWeight
			}

			var pr float64
			for ky := iy; ky < jy; ky++ {
				if yWeight := yWeights[ky-iy]; yWeight != 0 {
					for kx := ix; kx < jx; kx++ {
						if w := xWeights[kx-ix] * yWeight; w != 0 {
							pi := (ky-src.Rect.Min.Y)*src.Stride + (kx - src.Rect.Min.X)
							pru := uint32(src.Pix[pi]) * 0x101
							pr += float64(pru) * w
						}
					}
				}
			}
			out := uint8(fffftou(pr) >> 8)
			dst.Pix[d+0] = out
			dst.Pix[d+1] = out
			dst.Pix[d+2] = out
			dst.Pix[d+3] = 0xff
		}
	}
}

func (q *Kernel) transform_RGBA_NRGBA_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.NRGBA, sr image.Rectangle, bias image.Point, xscale, yscale float64, opts *Options) {
	// When shrinking, broaden the effective kernel support so that we still
	// visit every source pixel.
	xHalfWidth, xKernelArgScale := q.Support, 1.0
	if xscale > 1 {
		xHalfWidth *= xscale
		xKernelArgScale = 1 / xscale
	}
	yHalfWidth, yKernelArgScale := q.Support, 1.0
	if yscale > 1 {
		yHalfWidth *= yscale
		yKernelArgScale = 1 / yscale
	}

	xWeights := make([]float64, 1+2*int(math.Ceil(xHalfWidth)))
	yWeights := make([]float64, 1+2*int(math.Ceil(yHalfWidth)))

	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
				continue
			}

			// TODO: adjust the bias so that we can use int(f) instead
			// of math.Floor(f) and math.Ceil(f).
			sx += float64(bias.X)
			sx -= 0.5
			ix := int(math.Floor(sx - xHalfWidth))
			if ix < sr.Min.X {
				ix = sr.Min.X
			}
			jx := int(math.Ceil(sx + xHalfWidth))
			if jx > sr.Max.X {
				jx = sr.Max.X
			}

			totalXWeight := 0.0
			for kx := ix; kx < jx; kx++ {
				xWeight := 0.0
				if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support {
					xWeight = q.At(t)
				}
				xWeights[kx-ix] = xWeight
				totalXWeight += xWeight
			}
			for x := range xWeights[:jx-ix] {
				xWeights[x] /= totalXWeight
			}

			sy += float64(bias.Y)
			sy -= 0.5
			iy := int(math.Floor(sy - yHalfWidth))
			if iy < sr.Min.Y {
				iy = sr.Min.Y
			}
			jy := int(math.Ceil(sy + yHalfWidth))
			if jy > sr.Max.Y {
				jy = sr.Max.Y
			}

			totalYWeight := 0.0
			for ky := iy; ky < jy; ky++ {
				yWeight := 0.0
				if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support {
					yWeight = q.At(t)
				}
				yWeights[ky-iy] = yWeight
				totalYWeight += yWeight
			}
			for y := range yWeights[:jy-iy] {
				yWeights[y] /= totalYWeight
			}

			var pr, pg, pb, pa float64
			for ky := iy; ky < jy; ky++ {
				if yWeight := yWeights[ky-iy]; yWeight != 0 {
					for kx := ix; kx < jx; kx++ {
						if w := xWeights[kx-ix] * yWeight; w != 0 {
							pi := (ky-src.Rect.Min.Y)*src.Stride + (kx-src.Rect.Min.X)*4
							pau := uint32(src.Pix[pi+3]) * 0x101
							pru := uint32(src.Pix[pi+0]) * pau / 0xff
							pgu := uint32(src.Pix[pi+1]) * pau / 0xff
							pbu := uint32(src.Pix[pi+2]) * pau / 0xff
							pr += float64(pru) * w
							pg += float64(pgu) * w
							pb += float64(pbu) * w
							pa += float64(pau) * w
						}
					}
				}
			}

			if pr > pa {
				pr = pa
			}
			if pg > pa {
				pg = pa
			}
			if pb > pa {
				pb = pa
			}

			pr0 := uint32(fffftou(pr))
			pg0 := uint32(fffftou(pg))
			pb0 := uint32(fffftou(pb))
			pa0 := uint32(fffftou(pa))
			pa1 := (0xffff - uint32(pa0)) * 0x101
			dst.Pix[d+0] = uint8((uint32(dst.Pix[d+0])*pa1/0xffff + pr0) >> 8)
			dst.Pix[d+1] = uint8((uint32(dst.Pix[d+1])*pa1/0xffff + pg0) >> 8)
			dst.Pix[d+2] = uint8((uint32(dst.Pix[d+2])*pa1/0xffff + pb0) >> 8)
			dst.Pix[d+3] = uint8((uint32(dst.Pix[d+3])*pa1/0xffff + pa0) >> 8)
		}
	}
}

func (q *Kernel) transform_RGBA_NRGBA_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.NRGBA, sr image.Rectangle, bias image.Point, xscale, yscale float64, opts *Options) {
	// When shrinking, broaden the effective kernel support so that we still
	// visit every source pixel.
	xHalfWidth, xKernelArgScale := q.Support, 1.0
	if xscale > 1 {
		xHalfWidth *= xscale
		xKernelArgScale = 1 / xscale
	}
	yHalfWidth, yKernelArgScale := q.Support, 1.0
	if yscale > 1 {
		yHalfWidth *= yscale
		yKernelArgScale = 1 / yscale
	}

	xWeights := make([]float64, 1+2*int(math.Ceil(xHalfWidth)))
	yWeights := make([]float64, 1+2*int(math.Ceil(yHalfWidth)))

	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
				continue
			}

			// TODO: adjust the bias so that we can use int(f) instead
			// of math.Floor(f) and math.Ceil(f).
			sx += float64(bias.X)
			sx -= 0.5
			ix := int(math.Floor(sx - xHalfWidth))
			if ix < sr.Min.X {
				ix = sr.Min.X
			}
			jx := int(math.Ceil(sx + xHalfWidth))
			if jx > sr.Max.X {
				jx = sr.Max.X
			}

			totalXWeight := 0.0
			for kx := ix; kx < jx; kx++ {
				xWeight := 0.0
				if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support {
					xWeight = q.At(t)
				}
				xWeights[kx-ix] = xWeight
				totalXWeight += xWeight
			}
			for x := range xWeights[:jx-ix] {
				xWeights[x] /= totalXWeight
			}

			sy += float64(bias.Y)
			sy -= 0.5
			iy := int(math.Floor(sy - yHalfWidth))
			if iy < sr.Min.Y {
				iy = sr.Min.Y
			}
			jy := int(math.Ceil(sy + yHalfWidth))
			if jy > sr.Max.Y {
				jy = sr.Max.Y
			}

			totalYWeight := 0.0
			for ky := iy; ky < jy; ky++ {
				yWeight := 0.0
				if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support {
					yWeight = q.At(t)
				}
				yWeights[ky-iy] = yWeight
				totalYWeight += yWeight
			}
			for y := range yWeights[:jy-iy] {
				yWeights[y] /= totalYWeight
			}

			var pr, pg, pb, pa float64
			for ky := iy; ky < jy; ky++ {
				if yWeight := yWeights[ky-iy]; yWeight != 0 {
					for kx := ix; kx < jx; kx++ {
						if w := xWeights[kx-ix] * yWeight; w != 0 {
							pi := (ky-src.Rect.Min.Y)*src.Stride + (kx-src.Rect.Min.X)*4
							pau := uint32(src.Pix[pi+3]) * 0x101
							pru := uint32(src.Pix[pi+0]) * pau / 0xff
							pgu := uint32(src.Pix[pi+1]) * pau / 0xff
							pbu := uint32(src.Pix[pi+2]) * pau / 0xff
							pr += float64(pru) * w
							pg += float64(pgu) * w
							pb += float64(pbu) * w
							pa += float64(pau) * w
						}
					}
				}
			}

			if pr > pa {
				pr = pa
			}
			if pg > pa {
				pg = pa
			}
			if pb > pa {
				pb = pa
			}

			dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
			dst.Pix[d+1] = uint8(fffftou(pg) >> 8)
			dst.Pix[d+2] = uint8(fffftou(pb) >> 8)
			dst.Pix[d+3] = uint8(fffftou(pa) >> 8)
		}
	}
}

func (q *Kernel) transform_RGBA_RGBA_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.RGBA, sr image.Rectangle, bias image.Point, xscale, yscale float64, opts *Options) {
	// When shrinking, broaden the effective kernel support so that we still
	// visit every source pixel.
	xHalfWidth, xKernelArgScale := q.Support, 1.0
	if xscale > 1 {
		xHalfWidth *= xscale
		xKernelArgScale = 1 / xscale
	}
	yHalfWidth, yKernelArgScale := q.Support, 1.0
	if yscale > 1 {
		yHalfWidth *= yscale
		yKernelArgScale = 1 / yscale
	}

	xWeights := make([]float64, 1+2*int(math.Ceil(xHalfWidth)))
	yWeights := make([]float64, 1+2*int(math.Ceil(yHalfWidth)))

	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
				continue
			}

			// TODO: adjust the bias so that we can use int(f) instead
			// of math.Floor(f) and math.Ceil(f).
			sx += float64(bias.X)
			sx -= 0.5
			ix := int(math.Floor(sx - xHalfWidth))
			if ix < sr.Min.X {
				ix = sr.Min.X
			}
			jx := int(math.Ceil(sx + xHalfWidth))
			if jx > sr.Max.X {
				jx = sr.Max.X
			}

			totalXWeight := 0.0
			for kx := ix; kx < jx; kx++ {
				xWeight := 0.0
				if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support {
					xWeight = q.At(t)
				}
				xWeights[kx-ix] = xWeight
				totalXWeight += xWeight
			}
			for x := range xWeights[:jx-ix] {
				xWeights[x] /= totalXWeight
			}

			sy += float64(bias.Y)
			sy -= 0.5
			iy := int(math.Floor(sy - yHalfWidth))
			if iy < sr.Min.Y {
				iy = sr.Min.Y
			}
			jy := int(math.Ceil(sy + yHalfWidth))
			if jy > sr.Max.Y {
				jy = sr.Max.Y
			}

			totalYWeight := 0.0
			for ky := iy; ky < jy; ky++ {
				yWeight := 0.0
				if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support {
					yWeight = q.At(t)
				}
				yWeights[ky-iy] = yWeight
				totalYWeight += yWeight
			}
			for y := range yWeights[:jy-iy] {
				yWeights[y] /= totalYWeight
			}

			var pr, pg, pb, pa float64
			for ky := iy; ky < jy; ky++ {
				if yWeight := yWeights[ky-iy]; yWeight != 0 {
					for kx := ix; kx < jx; kx++ {
						if w := xWeights[kx-ix] * yWeight; w != 0 {
							pi := (ky-src.Rect.Min.Y)*src.Stride + (kx-src.Rect.Min.X)*4
							pru := uint32(src.Pix[pi+0]) * 0x101
							pgu := uint32(src.Pix[pi+1]) * 0x101
							pbu := uint32(src.Pix[pi+2]) * 0x101
							pau := uint32(src.Pix[pi+3]) * 0x101
							pr += float64(pru) * w
							pg += float64(pgu) * w
							pb += float64(pbu) * w
							pa += float64(pau) * w
						}
					}
				}
			}

			if pr > pa {
				pr = pa
			}
			if pg > pa {
				pg = pa
			}
			if pb > pa {
				pb = pa
			}

			pr0 := uint32(fffftou(pr))
			pg0 := uint32(fffftou(pg))
			pb0 := uint32(fffftou(pb))
			pa0 := uint32(fffftou(pa))
			pa1 := (0xffff - uint32(pa0)) * 0x101
			dst.Pix[d+0] = uint8((uint32(dst.Pix[d+0])*pa1/0xffff + pr0) >> 8)
			dst.Pix[d+1] = uint8((uint32(dst.Pix[d+1])*pa1/0xffff + pg0) >> 8)
			dst.Pix[d+2] = uint8((uint32(dst.Pix[d+2])*pa1/0xffff + pb0) >> 8)
			dst.Pix[d+3] = uint8((uint32(dst.Pix[d+3])*pa1/0xffff + pa0) >> 8)
		}
	}
}

func (q *Kernel) transform_RGBA_RGBA_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.RGBA, sr image.Rectangle, bias image.Point, xscale, yscale float64, opts *Options) {
	// When shrinking, broaden the effective kernel support so that we still
	// visit every source pixel.
	xHalfWidth, xKernelArgScale := q.Support, 1.0
	if xscale > 1 {
		xHalfWidth *= xscale
		xKernelArgScale = 1 / xscale
	}
	yHalfWidth, yKernelArgScale := q.Support, 1.0
	if yscale > 1 {
		yHalfWidth *= yscale
		yKernelArgScale = 1 / yscale
	}

	xWeights := make([]float64, 1+2*int(math.Ceil(xHalfWidth)))
	yWeights := make([]float64, 1+2*int(math.Ceil(yHalfWidth)))

	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
				continue
			}

			// TODO: adjust the bias so that we can use int(f) instead
			// of math.Floor(f) and math.Ceil(f).
			sx += float64(bias.X)
			sx -= 0.5
			ix := int(math.Floor(sx - xHalfWidth))
			if ix < sr.Min.X {
				ix = sr.Min.X
			}
			jx := int(math.Ceil(sx + xHalfWidth))
			if jx > sr.Max.X {
				jx = sr.Max.X
			}

			totalXWeight := 0.0
			for kx := ix; kx < jx; kx++ {
				xWeight := 0.0
				if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support {
					xWeight = q.At(t)
				}
				xWeights[kx-ix] = xWeight
				totalXWeight += xWeight
			}
			for x := range xWeights[:jx-ix] {
				xWeights[x] /= totalXWeight
			}

			sy += float64(bias.Y)
			sy -= 0.5
			iy := int(math.Floor(sy - yHalfWidth))
			if iy < sr.Min.Y {
				iy = sr.Min.Y
			}
			jy := int(math.Ceil(sy + yHalfWidth))
			if jy > sr.Max.Y {
				jy = sr.Max.Y
			}

			totalYWeight := 0.0
			for ky := iy; ky < jy; ky++ {
				yWeight := 0.0
				if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support {
					yWeight = q.At(t)
				}
				yWeights[ky-iy] = yWeight
				totalYWeight += yWeight
			}
			for y := range yWeights[:jy-iy] {
				yWeights[y] /= totalYWeight
			}

			var pr, pg, pb, pa float64
			for ky := iy; ky < jy; ky++ {
				if yWeight := yWeights[ky-iy]; yWeight != 0 {
					for kx := ix; kx < jx; kx++ {
						if w := xWeights[kx-ix] * yWeight; w != 0 {
							pi := (ky-src.Rect.Min.Y)*src.Stride + (kx-src.Rect.Min.X)*4
							pru := uint32(src.Pix[pi+0]) * 0x101
							pgu := uint32(src.Pix[pi+1]) * 0x101
							pbu := uint32(src.Pix[pi+2]) * 0x101
							pau := uint32(src.Pix[pi+3]) * 0x101
							pr += float64(pru) * w
							pg += float64(pgu) * w
							pb += float64(pbu) * w
							pa += float64(pau) * w
						}
					}
				}
			}

			if pr > pa {
				pr = pa
			}
			if pg > pa {
				pg = pa
			}
			if pb > pa {
				pb = pa
			}

			dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
			dst.Pix[d+1] = uint8(fffftou(pg) >> 8)
			dst.Pix[d+2] = uint8(fffftou(pb) >> 8)
			dst.Pix[d+3] = uint8(fffftou(pa) >> 8)
		}
	}
}

func (q *Kernel) transform_RGBA_YCbCr444_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point, xscale, yscale float64, opts *Options) {
	// When shrinking, broaden the effective kernel support so that we still
	// visit every source pixel.
	xHalfWidth, xKernelArgScale := q.Support, 1.0
	if xscale > 1 {
		xHalfWidth *= xscale
		xKernelArgScale = 1 / xscale
	}
	yHalfWidth, yKernelArgScale := q.Support, 1.0
	if yscale > 1 {
		yHalfWidth *= yscale
		yKernelArgScale = 1 / yscale
	}

	xWeights := make([]float64, 1+2*int(math.Ceil(xHalfWidth)))
	yWeights := make([]float64, 1+2*int(math.Ceil(yHalfWidth)))

	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
				continue
			}

			// TODO: adjust the bias so that we can use int(f) instead
			// of math.Floor(f) and math.Ceil(f).
			sx += float64(bias.X)
			sx -= 0.5
			ix := int(math.Floor(sx - xHalfWidth))
			if ix < sr.Min.X {
				ix = sr.Min.X
			}
			jx := int(math.Ceil(sx + xHalfWidth))
			if jx > sr.Max.X {
				jx = sr.Max.X
			}

			totalXWeight := 0.0
			for kx := ix; kx < jx; kx++ {
				xWeight := 0.0
				if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support {
					xWeight = q.At(t)
				}
				xWeights[kx-ix] = xWeight
				totalXWeight += xWeight
			}
			for x := range xWeights[:jx-ix] {
				xWeights[x] /= totalXWeight
			}

			sy += float64(bias.Y)
			sy -= 0.5
			iy := int(math.Floor(sy - yHalfWidth))
			if iy < sr.Min.Y {
				iy = sr.Min.Y
			}
			jy := int(math.Ceil(sy + yHalfWidth))
			if jy > sr.Max.Y {
				jy = sr.Max.Y
			}

			totalYWeight := 0.0
			for ky := iy; ky < jy; ky++ {
				yWeight := 0.0
				if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support {
					yWeight = q.At(t)
				}
				yWeights[ky-iy] = yWeight
				totalYWeight += yWeight
			}
			for y := range yWeights[:jy-iy] {
				yWeights[y] /= totalYWeight
			}

			var pr, pg, pb float64
			for ky := iy; ky < jy; ky++ {
				if yWeight := yWeights[ky-iy]; yWeight != 0 {
					for kx := ix; kx < jx; kx++ {
						if w := xWeights[kx-ix] * yWeight; w != 0 {
							pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X)
							pj := (ky-src.Rect.Min.Y)*src.CStride + (kx - src.Rect.Min.X)

							// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
							pyy1 := int(src.Y[pi]) * 0x10101
							pcb1 := int(src.Cb[pj]) - 128
							pcr1 := int(src.Cr[pj]) - 128
							pru := (pyy1 + 91881*pcr1) >> 8
							pgu := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 8
							pbu := (pyy1 + 116130*pcb1) >> 8
							if pru < 0 {
								pru = 0
							} else if pru > 0xffff {
								pru = 0xffff
							}
							if pgu < 0 {
								pgu = 0
							} else if pgu > 0xffff {
								pgu = 0xffff
							}
							if pbu < 0 {
								pbu = 0
							} else if pbu > 0xffff {
								pbu = 0xffff
							}

							pr += float64(pru) * w
							pg += float64(pgu) * w
							pb += float64(pbu) * w
						}
					}
				}
			}
			dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
			dst.Pix[d+1] = uint8(fffftou(pg) >> 8)
			dst.Pix[d+2] = uint8(fffftou(pb) >> 8)
			dst.Pix[d+3] = 0xff
		}
	}
}

func (q *Kernel) transform_RGBA_YCbCr422_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point, xscale, yscale float64, opts *Options) {
	// When shrinking, broaden the effective kernel support so that we still
	// visit every source pixel.
	xHalfWidth, xKernelArgScale := q.Support, 1.0
	if xscale > 1 {
		xHalfWidth *= xscale
		xKernelArgScale = 1 / xscale
	}
	yHalfWidth, yKernelArgScale := q.Support, 1.0
	if yscale > 1 {
		yHalfWidth *= yscale
		yKernelArgScale = 1 / yscale
	}

	xWeights := make([]float64, 1+2*int(math.Ceil(xHalfWidth)))
	yWeights := make([]float64, 1+2*int(math.Ceil(yHalfWidth)))

	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
				continue
			}

			// TODO: adjust the bias so that we can use int(f) instead
			// of math.Floor(f) and math.Ceil(f).
			sx += float64(bias.X)
			sx -= 0.5
			ix := int(math.Floor(sx - xHalfWidth))
			if ix < sr.Min.X {
				ix = sr.Min.X
			}
			jx := int(math.Ceil(sx + xHalfWidth))
			if jx > sr.Max.X {
				jx = sr.Max.X
			}

			totalXWeight := 0.0
			for kx := ix; kx < jx; kx++ {
				xWeight := 0.0
				if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support {
					xWeight = q.At(t)
				}
				xWeights[kx-ix] = xWeight
				totalXWeight += xWeight
			}
			for x := range xWeights[:jx-ix] {
				xWeights[x] /= totalXWeight
			}

			sy += float64(bias.Y)
			sy -= 0.5
			iy := int(math.Floor(sy - yHalfWidth))
			if iy < sr.Min.Y {
				iy = sr.Min.Y
			}
			jy := int(math.Ceil(sy + yHalfWidth))
			if jy > sr.Max.Y {
				jy = sr.Max.Y
			}

			totalYWeight := 0.0
			for ky := iy; ky < jy; ky++ {
				yWeight := 0.0
				if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support {
					yWeight = q.At(t)
				}
				yWeights[ky-iy] = yWeight
				totalYWeight += yWeight
			}
			for y := range yWeights[:jy-iy] {
				yWeights[y] /= totalYWeight
			}

			var pr, pg, pb float64
			for ky := iy; ky < jy; ky++ {
				if yWeight := yWeights[ky-iy]; yWeight != 0 {
					for kx := ix; kx < jx; kx++ {
						if w := xWeights[kx-ix] * yWeight; w != 0 {
							pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X)
							pj := (ky-src.Rect.Min.Y)*src.CStride + ((kx)/2 - src.Rect.Min.X/2)

							// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
							pyy1 := int(src.Y[pi]) * 0x10101
							pcb1 := int(src.Cb[pj]) - 128
							pcr1 := int(src.Cr[pj]) - 128
							pru := (pyy1 + 91881*pcr1) >> 8
							pgu := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 8
							pbu := (pyy1 + 116130*pcb1) >> 8
							if pru < 0 {
								pru = 0
							} else if pru > 0xffff {
								pru = 0xffff
							}
							if pgu < 0 {
								pgu = 0
							} else if pgu > 0xffff {
								pgu = 0xffff
							}
							if pbu < 0 {
								pbu = 0
							} else if pbu > 0xffff {
								pbu = 0xffff
							}

							pr += float64(pru) * w
							pg += float64(pgu) * w
							pb += float64(pbu) * w
						}
					}
				}
			}
			dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
			dst.Pix[d+1] = uint8(fffftou(pg) >> 8)
			dst.Pix[d+2] = uint8(fffftou(pb) >> 8)
			dst.Pix[d+3] = 0xff
		}
	}
}

func (q *Kernel) transform_RGBA_YCbCr420_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point, xscale, yscale float64, opts *Options) {
	// When shrinking, broaden the effective kernel support so that we still
	// visit every source pixel.
	xHalfWidth, xKernelArgScale := q.Support, 1.0
	if xscale > 1 {
		xHalfWidth *= xscale
		xKernelArgScale = 1 / xscale
	}
	yHalfWidth, yKernelArgScale := q.Support, 1.0
	if yscale > 1 {
		yHalfWidth *= yscale
		yKernelArgScale = 1 / yscale
	}

	xWeights := make([]float64, 1+2*int(math.Ceil(xHalfWidth)))
	yWeights := make([]float64, 1+2*int(math.Ceil(yHalfWidth)))

	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
				continue
			}

			// TODO: adjust the bias so that we can use int(f) instead
			// of math.Floor(f) and math.Ceil(f).
			sx += float64(bias.X)
			sx -= 0.5
			ix := int(math.Floor(sx - xHalfWidth))
			if ix < sr.Min.X {
				ix = sr.Min.X
			}
			jx := int(math.Ceil(sx + xHalfWidth))
			if jx > sr.Max.X {
				jx = sr.Max.X
			}

			totalXWeight := 0.0
			for kx := ix; kx < jx; kx++ {
				xWeight := 0.0
				if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support {
					xWeight = q.At(t)
				}
				xWeights[kx-ix] = xWeight
				totalXWeight += xWeight
			}
			for x := range xWeights[:jx-ix] {
				xWeights[x] /= totalXWeight
			}

			sy += float64(bias.Y)
			sy -= 0.5
			iy := int(math.Floor(sy - yHalfWidth))
			if iy < sr.Min.Y {
				iy = sr.Min.Y
			}
			jy := int(math.Ceil(sy + yHalfWidth))
			if jy > sr.Max.Y {
				jy = sr.Max.Y
			}

			totalYWeight := 0.0
			for ky := iy; ky < jy; ky++ {
				yWeight := 0.0
				if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support {
					yWeight = q.At(t)
				}
				yWeights[ky-iy] = yWeight
				totalYWeight += yWeight
			}
			for y := range yWeights[:jy-iy] {
				yWeights[y] /= totalYWeight
			}

			var pr, pg, pb float64
			for ky := iy; ky < jy; ky++ {
				if yWeight := yWeights[ky-iy]; yWeight != 0 {
					for kx := ix; kx < jx; kx++ {
						if w := xWeights[kx-ix] * yWeight; w != 0 {
							pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X)
							pj := ((ky)/2-src.Rect.Min.Y/2)*src.CStride + ((kx)/2 - src.Rect.Min.X/2)

							// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
							pyy1 := int(src.Y[pi]) * 0x10101
							pcb1 := int(src.Cb[pj]) - 128
							pcr1 := int(src.Cr[pj]) - 128
							pru := (pyy1 + 91881*pcr1) >> 8
							pgu := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 8
							pbu := (pyy1 + 116130*pcb1) >> 8
							if pru < 0 {
								pru = 0
							} else if pru > 0xffff {
								pru = 0xffff
							}
							if pgu < 0 {
								pgu = 0
							} else if pgu > 0xffff {
								pgu = 0xffff
							}
							if pbu < 0 {
								pbu = 0
							} else if pbu > 0xffff {
								pbu = 0xffff
							}

							pr += float64(pru) * w
							pg += float64(pgu) * w
							pb += float64(pbu) * w
						}
					}
				}
			}
			dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
			dst.Pix[d+1] = uint8(fffftou(pg) >> 8)
			dst.Pix[d+2] = uint8(fffftou(pb) >> 8)
			dst.Pix[d+3] = 0xff
		}
	}
}

func (q *Kernel) transform_RGBA_YCbCr440_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src *image.YCbCr, sr image.Rectangle, bias image.Point, xscale, yscale float64, opts *Options) {
	// When shrinking, broaden the effective kernel support so that we still
	// visit every source pixel.
	xHalfWidth, xKernelArgScale := q.Support, 1.0
	if xscale > 1 {
		xHalfWidth *= xscale
		xKernelArgScale = 1 / xscale
	}
	yHalfWidth, yKernelArgScale := q.Support, 1.0
	if yscale > 1 {
		yHalfWidth *= yscale
		yKernelArgScale = 1 / yscale
	}

	xWeights := make([]float64, 1+2*int(math.Ceil(xHalfWidth)))
	yWeights := make([]float64, 1+2*int(math.Ceil(yHalfWidth)))

	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
				continue
			}

			// TODO: adjust the bias so that we can use int(f) instead
			// of math.Floor(f) and math.Ceil(f).
			sx += float64(bias.X)
			sx -= 0.5
			ix := int(math.Floor(sx - xHalfWidth))
			if ix < sr.Min.X {
				ix = sr.Min.X
			}
			jx := int(math.Ceil(sx + xHalfWidth))
			if jx > sr.Max.X {
				jx = sr.Max.X
			}

			totalXWeight := 0.0
			for kx := ix; kx < jx; kx++ {
				xWeight := 0.0
				if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support {
					xWeight = q.At(t)
				}
				xWeights[kx-ix] = xWeight
				totalXWeight += xWeight
			}
			for x := range xWeights[:jx-ix] {
				xWeights[x] /= totalXWeight
			}

			sy += float64(bias.Y)
			sy -= 0.5
			iy := int(math.Floor(sy - yHalfWidth))
			if iy < sr.Min.Y {
				iy = sr.Min.Y
			}
			jy := int(math.Ceil(sy + yHalfWidth))
			if jy > sr.Max.Y {
				jy = sr.Max.Y
			}

			totalYWeight := 0.0
			for ky := iy; ky < jy; ky++ {
				yWeight := 0.0
				if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support {
					yWeight = q.At(t)
				}
				yWeights[ky-iy] = yWeight
				totalYWeight += yWeight
			}
			for y := range yWeights[:jy-iy] {
				yWeights[y] /= totalYWeight
			}

			var pr, pg, pb float64
			for ky := iy; ky < jy; ky++ {
				if yWeight := yWeights[ky-iy]; yWeight != 0 {
					for kx := ix; kx < jx; kx++ {
						if w := xWeights[kx-ix] * yWeight; w != 0 {
							pi := (ky-src.Rect.Min.Y)*src.YStride + (kx - src.Rect.Min.X)
							pj := ((ky)/2-src.Rect.Min.Y/2)*src.CStride + (kx - src.Rect.Min.X)

							// This is an inline version of image/color/ycbcr.go's YCbCr.RGBA method.
							pyy1 := int(src.Y[pi]) * 0x10101
							pcb1 := int(src.Cb[pj]) - 128
							pcr1 := int(src.Cr[pj]) - 128
							pru := (pyy1 + 91881*pcr1) >> 8
							pgu := (pyy1 - 22554*pcb1 - 46802*pcr1) >> 8
							pbu := (pyy1 + 116130*pcb1) >> 8
							if pru < 0 {
								pru = 0
							} else if pru > 0xffff {
								pru = 0xffff
							}
							if pgu < 0 {
								pgu = 0
							} else if pgu > 0xffff {
								pgu = 0xffff
							}
							if pbu < 0 {
								pbu = 0
							} else if pbu > 0xffff {
								pbu = 0xffff
							}

							pr += float64(pru) * w
							pg += float64(pgu) * w
							pb += float64(pbu) * w
						}
					}
				}
			}
			dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
			dst.Pix[d+1] = uint8(fffftou(pg) >> 8)
			dst.Pix[d+2] = uint8(fffftou(pb) >> 8)
			dst.Pix[d+3] = 0xff
		}
	}
}

func (q *Kernel) transform_RGBA_Image_Over(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point, xscale, yscale float64, opts *Options) {
	// When shrinking, broaden the effective kernel support so that we still
	// visit every source pixel.
	xHalfWidth, xKernelArgScale := q.Support, 1.0
	if xscale > 1 {
		xHalfWidth *= xscale
		xKernelArgScale = 1 / xscale
	}
	yHalfWidth, yKernelArgScale := q.Support, 1.0
	if yscale > 1 {
		yHalfWidth *= yscale
		yKernelArgScale = 1 / yscale
	}

	xWeights := make([]float64, 1+2*int(math.Ceil(xHalfWidth)))
	yWeights := make([]float64, 1+2*int(math.Ceil(yHalfWidth)))

	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
				continue
			}

			// TODO: adjust the bias so that we can use int(f) instead
			// of math.Floor(f) and math.Ceil(f).
			sx += float64(bias.X)
			sx -= 0.5
			ix := int(math.Floor(sx - xHalfWidth))
			if ix < sr.Min.X {
				ix = sr.Min.X
			}
			jx := int(math.Ceil(sx + xHalfWidth))
			if jx > sr.Max.X {
				jx = sr.Max.X
			}

			totalXWeight := 0.0
			for kx := ix; kx < jx; kx++ {
				xWeight := 0.0
				if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support {
					xWeight = q.At(t)
				}
				xWeights[kx-ix] = xWeight
				totalXWeight += xWeight
			}
			for x := range xWeights[:jx-ix] {
				xWeights[x] /= totalXWeight
			}

			sy += float64(bias.Y)
			sy -= 0.5
			iy := int(math.Floor(sy - yHalfWidth))
			if iy < sr.Min.Y {
				iy = sr.Min.Y
			}
			jy := int(math.Ceil(sy + yHalfWidth))
			if jy > sr.Max.Y {
				jy = sr.Max.Y
			}

			totalYWeight := 0.0
			for ky := iy; ky < jy; ky++ {
				yWeight := 0.0
				if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support {
					yWeight = q.At(t)
				}
				yWeights[ky-iy] = yWeight
				totalYWeight += yWeight
			}
			for y := range yWeights[:jy-iy] {
				yWeights[y] /= totalYWeight
			}

			var pr, pg, pb, pa float64
			for ky := iy; ky < jy; ky++ {
				if yWeight := yWeights[ky-iy]; yWeight != 0 {
					for kx := ix; kx < jx; kx++ {
						if w := xWeights[kx-ix] * yWeight; w != 0 {
							pru, pgu, pbu, pau := src.At(kx, ky).RGBA()
							pr += float64(pru) * w
							pg += float64(pgu) * w
							pb += float64(pbu) * w
							pa += float64(pau) * w
						}
					}
				}
			}

			if pr > pa {
				pr = pa
			}
			if pg > pa {
				pg = pa
			}
			if pb > pa {
				pb = pa
			}

			pr0 := uint32(fffftou(pr))
			pg0 := uint32(fffftou(pg))
			pb0 := uint32(fffftou(pb))
			pa0 := uint32(fffftou(pa))
			pa1 := (0xffff - uint32(pa0)) * 0x101
			dst.Pix[d+0] = uint8((uint32(dst.Pix[d+0])*pa1/0xffff + pr0) >> 8)
			dst.Pix[d+1] = uint8((uint32(dst.Pix[d+1])*pa1/0xffff + pg0) >> 8)
			dst.Pix[d+2] = uint8((uint32(dst.Pix[d+2])*pa1/0xffff + pb0) >> 8)
			dst.Pix[d+3] = uint8((uint32(dst.Pix[d+3])*pa1/0xffff + pa0) >> 8)
		}
	}
}

func (q *Kernel) transform_RGBA_Image_Src(dst *image.RGBA, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point, xscale, yscale float64, opts *Options) {
	// When shrinking, broaden the effective kernel support so that we still
	// visit every source pixel.
	xHalfWidth, xKernelArgScale := q.Support, 1.0
	if xscale > 1 {
		xHalfWidth *= xscale
		xKernelArgScale = 1 / xscale
	}
	yHalfWidth, yKernelArgScale := q.Support, 1.0
	if yscale > 1 {
		yHalfWidth *= yscale
		yKernelArgScale = 1 / yscale
	}

	xWeights := make([]float64, 1+2*int(math.Ceil(xHalfWidth)))
	yWeights := make([]float64, 1+2*int(math.Ceil(yHalfWidth)))

	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		d := (dr.Min.Y+int(dy)-dst.Rect.Min.Y)*dst.Stride + (dr.Min.X+adr.Min.X-dst.Rect.Min.X)*4
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx, d = dx+1, d+4 {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
				continue
			}

			// TODO: adjust the bias so that we can use int(f) instead
			// of math.Floor(f) and math.Ceil(f).
			sx += float64(bias.X)
			sx -= 0.5
			ix := int(math.Floor(sx - xHalfWidth))
			if ix < sr.Min.X {
				ix = sr.Min.X
			}
			jx := int(math.Ceil(sx + xHalfWidth))
			if jx > sr.Max.X {
				jx = sr.Max.X
			}

			totalXWeight := 0.0
			for kx := ix; kx < jx; kx++ {
				xWeight := 0.0
				if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support {
					xWeight = q.At(t)
				}
				xWeights[kx-ix] = xWeight
				totalXWeight += xWeight
			}
			for x := range xWeights[:jx-ix] {
				xWeights[x] /= totalXWeight
			}

			sy += float64(bias.Y)
			sy -= 0.5
			iy := int(math.Floor(sy - yHalfWidth))
			if iy < sr.Min.Y {
				iy = sr.Min.Y
			}
			jy := int(math.Ceil(sy + yHalfWidth))
			if jy > sr.Max.Y {
				jy = sr.Max.Y
			}

			totalYWeight := 0.0
			for ky := iy; ky < jy; ky++ {
				yWeight := 0.0
				if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support {
					yWeight = q.At(t)
				}
				yWeights[ky-iy] = yWeight
				totalYWeight += yWeight
			}
			for y := range yWeights[:jy-iy] {
				yWeights[y] /= totalYWeight
			}

			var pr, pg, pb, pa float64
			for ky := iy; ky < jy; ky++ {
				if yWeight := yWeights[ky-iy]; yWeight != 0 {
					for kx := ix; kx < jx; kx++ {
						if w := xWeights[kx-ix] * yWeight; w != 0 {
							pru, pgu, pbu, pau := src.At(kx, ky).RGBA()
							pr += float64(pru) * w
							pg += float64(pgu) * w
							pb += float64(pbu) * w
							pa += float64(pau) * w
						}
					}
				}
			}

			if pr > pa {
				pr = pa
			}
			if pg > pa {
				pg = pa
			}
			if pb > pa {
				pb = pa
			}

			dst.Pix[d+0] = uint8(fffftou(pr) >> 8)
			dst.Pix[d+1] = uint8(fffftou(pg) >> 8)
			dst.Pix[d+2] = uint8(fffftou(pb) >> 8)
			dst.Pix[d+3] = uint8(fffftou(pa) >> 8)
		}
	}
}

func (q *Kernel) transform_Image_Image_Over(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point, xscale, yscale float64, opts *Options) {
	// When shrinking, broaden the effective kernel support so that we still
	// visit every source pixel.
	xHalfWidth, xKernelArgScale := q.Support, 1.0
	if xscale > 1 {
		xHalfWidth *= xscale
		xKernelArgScale = 1 / xscale
	}
	yHalfWidth, yKernelArgScale := q.Support, 1.0
	if yscale > 1 {
		yHalfWidth *= yscale
		yKernelArgScale = 1 / yscale
	}

	xWeights := make([]float64, 1+2*int(math.Ceil(xHalfWidth)))
	yWeights := make([]float64, 1+2*int(math.Ceil(yHalfWidth)))

	srcMask, smp := opts.SrcMask, opts.SrcMaskP
	dstMask, dmp := opts.DstMask, opts.DstMaskP
	dstColorRGBA64 := &color.RGBA64{}
	dstColor := color.Color(dstColorRGBA64)
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
				continue
			}

			// TODO: adjust the bias so that we can use int(f) instead
			// of math.Floor(f) and math.Ceil(f).
			sx += float64(bias.X)
			sx -= 0.5
			ix := int(math.Floor(sx - xHalfWidth))
			if ix < sr.Min.X {
				ix = sr.Min.X
			}
			jx := int(math.Ceil(sx + xHalfWidth))
			if jx > sr.Max.X {
				jx = sr.Max.X
			}

			totalXWeight := 0.0
			for kx := ix; kx < jx; kx++ {
				xWeight := 0.0
				if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support {
					xWeight = q.At(t)
				}
				xWeights[kx-ix] = xWeight
				totalXWeight += xWeight
			}
			for x := range xWeights[:jx-ix] {
				xWeights[x] /= totalXWeight
			}

			sy += float64(bias.Y)
			sy -= 0.5
			iy := int(math.Floor(sy - yHalfWidth))
			if iy < sr.Min.Y {
				iy = sr.Min.Y
			}
			jy := int(math.Ceil(sy + yHalfWidth))
			if jy > sr.Max.Y {
				jy = sr.Max.Y
			}

			totalYWeight := 0.0
			for ky := iy; ky < jy; ky++ {
				yWeight := 0.0
				if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support {
					yWeight = q.At(t)
				}
				yWeights[ky-iy] = yWeight
				totalYWeight += yWeight
			}
			for y := range yWeights[:jy-iy] {
				yWeights[y] /= totalYWeight
			}

			var pr, pg, pb, pa float64
			for ky := iy; ky < jy; ky++ {
				if yWeight := yWeights[ky-iy]; yWeight != 0 {
					for kx := ix; kx < jx; kx++ {
						if w := xWeights[kx-ix] * yWeight; w != 0 {
							pru, pgu, pbu, pau := src.At(kx, ky).RGBA()
							if srcMask != nil {
								_, _, _, ma := srcMask.At(smp.X+kx, smp.Y+ky).RGBA()
								pru = pru * ma / 0xffff
								pgu = pgu * ma / 0xffff
								pbu = pbu * ma / 0xffff
								pau = pau * ma / 0xffff
							}
							pr += float64(pru) * w
							pg += float64(pgu) * w
							pb += float64(pbu) * w
							pa += float64(pau) * w
						}
					}
				}
			}

			if pr > pa {
				pr = pa
			}
			if pg > pa {
				pg = pa
			}
			if pb > pa {
				pb = pa
			}

			qr, qg, qb, qa := dst.At(dr.Min.X+int(dx), dr.Min.Y+int(dy)).RGBA()
			pr0 := uint32(fffftou(pr))
			pg0 := uint32(fffftou(pg))
			pb0 := uint32(fffftou(pb))
			pa0 := uint32(fffftou(pa))
			if dstMask != nil {
				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA()
				pr0 = pr0 * ma / 0xffff
				pg0 = pg0 * ma / 0xffff
				pb0 = pb0 * ma / 0xffff
				pa0 = pa0 * ma / 0xffff
			}
			pa1 := 0xffff - pa0
			dstColorRGBA64.R = uint16(qr*pa1/0xffff + pr0)
			dstColorRGBA64.G = uint16(qg*pa1/0xffff + pg0)
			dstColorRGBA64.B = uint16(qb*pa1/0xffff + pb0)
			dstColorRGBA64.A = uint16(qa*pa1/0xffff + pa0)
			dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColor)
		}
	}
}

func (q *Kernel) transform_Image_Image_Src(dst Image, dr, adr image.Rectangle, d2s *f64.Aff3, src image.Image, sr image.Rectangle, bias image.Point, xscale, yscale float64, opts *Options) {
	// When shrinking, broaden the effective kernel support so that we still
	// visit every source pixel.
	xHalfWidth, xKernelArgScale := q.Support, 1.0
	if xscale > 1 {
		xHalfWidth *= xscale
		xKernelArgScale = 1 / xscale
	}
	yHalfWidth, yKernelArgScale := q.Support, 1.0
	if yscale > 1 {
		yHalfWidth *= yscale
		yKernelArgScale = 1 / yscale
	}

	xWeights := make([]float64, 1+2*int(math.Ceil(xHalfWidth)))
	yWeights := make([]float64, 1+2*int(math.Ceil(yHalfWidth)))

	srcMask, smp := opts.SrcMask, opts.SrcMaskP
	dstMask, dmp := opts.DstMask, opts.DstMaskP
	dstColorRGBA64 := &color.RGBA64{}
	dstColor := color.Color(dstColorRGBA64)
	for dy := int32(adr.Min.Y); dy < int32(adr.Max.Y); dy++ {
		dyf := float64(dr.Min.Y+int(dy)) + 0.5
		for dx := int32(adr.Min.X); dx < int32(adr.Max.X); dx++ {
			dxf := float64(dr.Min.X+int(dx)) + 0.5
			sx := d2s[0]*dxf + d2s[1]*dyf + d2s[2]
			sy := d2s[3]*dxf + d2s[4]*dyf + d2s[5]
			if !(image.Point{int(sx) + bias.X, int(sy) + bias.Y}).In(sr) {
				continue
			}

			// TODO: adjust the bias so that we can use int(f) instead
			// of math.Floor(f) and math.Ceil(f).
			sx += float64(bias.X)
			sx -= 0.5
			ix := int(math.Floor(sx - xHalfWidth))
			if ix < sr.Min.X {
				ix = sr.Min.X
			}
			jx := int(math.Ceil(sx + xHalfWidth))
			if jx > sr.Max.X {
				jx = sr.Max.X
			}

			totalXWeight := 0.0
			for kx := ix; kx < jx; kx++ {
				xWeight := 0.0
				if t := abs((sx - float64(kx)) * xKernelArgScale); t < q.Support {
					xWeight = q.At(t)
				}
				xWeights[kx-ix] = xWeight
				totalXWeight += xWeight
			}
			for x := range xWeights[:jx-ix] {
				xWeights[x] /= totalXWeight
			}

			sy += float64(bias.Y)
			sy -= 0.5
			iy := int(math.Floor(sy - yHalfWidth))
			if iy < sr.Min.Y {
				iy = sr.Min.Y
			}
			jy := int(math.Ceil(sy + yHalfWidth))
			if jy > sr.Max.Y {
				jy = sr.Max.Y
			}

			totalYWeight := 0.0
			for ky := iy; ky < jy; ky++ {
				yWeight := 0.0
				if t := abs((sy - float64(ky)) * yKernelArgScale); t < q.Support {
					yWeight = q.At(t)
				}
				yWeights[ky-iy] = yWeight
				totalYWeight += yWeight
			}
			for y := range yWeights[:jy-iy] {
				yWeights[y] /= totalYWeight
			}

			var pr, pg, pb, pa float64
			for ky := iy; ky < jy; ky++ {
				if yWeight := yWeights[ky-iy]; yWeight != 0 {
					for kx := ix; kx < jx; kx++ {
						if w := xWeights[kx-ix] * yWeight; w != 0 {
							pru, pgu, pbu, pau := src.At(kx, ky).RGBA()
							if srcMask != nil {
								_, _, _, ma := srcMask.At(smp.X+kx, smp.Y+ky).RGBA()
								pru = pru * ma / 0xffff
								pgu = pgu * ma / 0xffff
								pbu = pbu * ma / 0xffff
								pau = pau * ma / 0xffff
							}
							pr += float64(pru) * w
							pg += float64(pgu) * w
							pb += float64(pbu) * w
							pa += float64(pau) * w
						}
					}
				}
			}

			if pr > pa {
				pr = pa
			}
			if pg > pa {
				pg = pa
			}
			if pb > pa {
				pb = pa
			}

			if dstMask != nil {
				qr, qg, qb, qa := dst.At(dr.Min.X+int(dx), dr.Min.Y+int(dy)).RGBA()
				_, _, _, ma := dstMask.At(dmp.X+dr.Min.X+int(dx), dmp.Y+dr.Min.Y+int(dy)).RGBA()
				pr := uint32(fffftou(pr)) * ma / 0xffff
				pg := uint32(fffftou(pg)) * ma / 0xffff
				pb := uint32(fffftou(pb)) * ma / 0xffff
				pa := uint32(fffftou(pa)) * ma / 0xffff
				pa1 := 0xffff - ma
				dstColorRGBA64.R = uint16(qr*pa1/0xffff + pr)
				dstColorRGBA64.G = uint16(qg*pa1/0xffff + pg)
				dstColorRGBA64.B = uint16(qb*pa1/0xffff + pb)
				dstColorRGBA64.A = uint16(qa*pa1/0xffff + pa)
				dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColor)
			} else {
				dstColorRGBA64.R = fffftou(pr)
				dstColorRGBA64.G = fffftou(pg)
				dstColorRGBA64.B = fffftou(pb)
				dstColorRGBA64.A = fffftou(pa)
				dst.Set(dr.Min.X+int(dx), dr.Min.Y+int(dy), dstColor)
			}
		}
	}
}
